diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d76045fe8e..dea0289e73 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -282,6 +282,8 @@ jobs: env: JOB_NAME: "Upgrade-Tests-${{ matrix.gateway-mode }}" OVN_HA: "false" + PLATFORM_IPV4_SUPPORT: "true" + PLATFORM_IPV6_SUPPORT: "false" KIND_IPV4_SUPPORT: "true" KIND_IPV6_SUPPORT: "false" OVN_HYBRID_OVERLAY_ENABLE: "false" @@ -453,7 +455,7 @@ jobs: - {"target": "external-gateway", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "2br", "ic": "ic-single-node-zones"} - {"target": "external-gateway", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "kv-live-migration", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled", "num-workers": "3", "network-segmentation": "enable-network-segmentation"} - - {"target": "kv-live-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": "enable-network-segmentation"} + - {"target": "kv-live-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": "enable-network-segmentation", "routeadvertisements": "true"} - {"target": "control-plane", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "forwarding": "disable-forwarding"} - {"target": "network-segmentation", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "forwarding": "disable-forwarding"} - {"target": "network-segmentation", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} @@ -475,11 +477,11 @@ jobs: KIND_INSTALL_METALLB: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'network-segmentation' }}" OVN_GATEWAY_MODE: "${{ matrix.gateway-mode }}" OVN_SECOND_BRIDGE: "${{ matrix.second-bridge == '2br' }}" - KIND_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" - KIND_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' || matrix.target == 'kv-live-migration' || matrix.target == 'network-segmentation' || matrix.target == 'tools' || matrix.target == 'multi-homing-helm' || matrix.target == 'traffic-flow-test-only' || matrix.routeadvertisements != '' }}" ENABLE_NETWORK_SEGMENTATION: "${{ matrix.target == 'network-segmentation' || matrix.network-segmentation == 'enable-network-segmentation' }}" DISABLE_UDN_HOST_ISOLATION: "true" + PLATFORM_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" + PLATFORM_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" KIND_INSTALL_KUBEVIRT: "${{ matrix.target == 'kv-live-migration' }}" OVN_COMPACT_MODE: "${{ matrix.target == 'compact-mode' }}" OVN_DUMMY_GATEWAY_BRIDGE: "${{ matrix.target == 'compact-mode' }}" @@ -489,6 +491,7 @@ jobs: OVN_DISABLE_FORWARDING: "${{ matrix.forwarding == 'disable-forwarding' }}" USE_HELM: "${{ matrix.target == 'control-plane-helm' || matrix.target == 'multi-homing-helm' }}" OVN_ENABLE_DNSNAMERESOLVER: "${{ matrix.dns-name-resolver == 'enable-dns-name-resolver' }}" + OVN_NETWORK_QOS_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' }}" TRAFFIC_FLOW_TESTS: "${{ matrix.traffic-flow-tests }}" ENABLE_ROUTE_ADVERTISEMENTS: "${{ matrix.routeadvertisements != '' }}" ADVERTISE_DEFAULT_NETWORK: "${{ matrix.routeadvertisements == 'advertise-default' }}" @@ -550,7 +553,8 @@ jobs: echo "GOPATH=$GOPATH" >> $GITHUB_ENV echo "$GOPATH/bin" >> $GITHUB_PATH if [ $OVN_SECOND_BRIDGE == "true" ]; then - echo OVN_TEST_EX_GW_NETWORK=kindexgw >> $GITHUB_ENV + # must be "greater" lexigraphically than network "kind", therefore external gateway is named xgw + echo OVN_TEST_EX_GW_NETWORK=xgw >> $GITHUB_ENV echo OVN_ENABLE_EX_GW_NETWORK_BRIDGE=true >> $GITHUB_ENV fi if [[ "$JOB_NAME" == *"shard-conformance"* ]] && [ "$ADVERTISE_DEFAULT_NETWORK" == "true" ]; then @@ -629,7 +633,9 @@ jobs: make -C test control-plane WHAT="Kubevirt Virtual Machines" elif [ "${{ matrix.target }}" == "control-plane-helm" ]; then make -C test control-plane - make -C test conformance + if [ "${{ matrix.ipfamily }}" != "ipv6" ]; then + make -C test conformance + fi elif [ "${{ matrix.target }}" == "network-segmentation" ]; then make -C test control-plane WHAT="Network Segmentation" elif [ "${{ matrix.target }}" == "bgp" ]; then @@ -688,8 +694,8 @@ jobs: env: JOB_NAME: "DualStack-conversion-shared-${{ matrix.ha }}-${{ matrix.interconnect }}" OVN_HA: "${{ matrix.ha == 'HA' }}" - KIND_IPV4_SUPPORT: "true" - KIND_IPV6_SUPPORT: "false" + PLATFORM_IPV4_SUPPORT: "true" + PLATFORM_IPV6_SUPPORT: "false" OVN_HYBRID_OVERLAY_ENABLE: "false" OVN_GATEWAY_MODE: "shared" OVN_MULTICAST_ENABLE: "false" diff --git a/Dockerfile.base b/Dockerfile.base index cdcb3f65f9..4f551a411d 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -12,25 +12,28 @@ RUN dnf --setopt=retries=2 --setopt=timeout=2 install -y --nodocs \ selinux-policy procps-ng && \ dnf clean all -ARG ovsver=3.5.0-0.9.el9fdp -ARG ovnver=24.09.2-41.el9fdp +# NOTE: OVS is not pinned to a particular patch version in order to stay in +# sync with the OVS running on the host (it is not strictly necessary, but +# reduces the number of variables in the system) and receive all the CVE and +# bug fixes automatically. +ARG ovsver=3.5 +ARG ovnver=24.09.2-69.el9fdp # NOTE: Ensure that the versions of OVS and OVN are overriden for OKD in each of the subsequent layers. # Centos and RHEL releases for ovn are built out of sync, so please make sure to bump for OKD with # the corresponding Centos version when updating the OCP version. -ARG ovsver_okd=3.5.0-10.el9s +ARG ovsver_okd=3.5 # We are not bumping the OVN version for OKD since the FDP release is not done yet. ARG ovnver_okd=24.09.1-10.el9s RUN INSTALL_PKGS="iptables nftables" && \ source /etc/os-release && \ [ "${ID}" == "centos" ] && ovsver=$ovsver_okd && ovnver=$ovnver_okd; \ - ovsver_short=$(echo "$ovsver" | cut -d'.' -f1,2) && \ ovnver_short=$(echo "$ovnver" | cut -d'.' -f1,2) && \ dnf --setopt=retries=2 --setopt=timeout=2 install -y --nodocs $INSTALL_PKGS && \ - dnf --setopt=retries=2 --setopt=timeout=2 install -y --nodocs "openvswitch$ovsver_short = $ovsver" "python3-openvswitch$ovsver_short = $ovsver" && \ + dnf --setopt=retries=2 --setopt=timeout=2 install -y --nodocs "openvswitch$ovsver" "python3-openvswitch$ovsver" && \ dnf --setopt=retries=2 --setopt=timeout=2 install -y --nodocs "ovn$ovnver_short = $ovnver" "ovn$ovnver_short-central = $ovnver" "ovn$ovnver_short-host = $ovnver" && \ dnf clean all && rm -rf /var/cache/* && \ - sed 's/%/"/g' <<<"%openvswitch$ovsver_short-devel = $ovsver% %openvswitch$ovsver_short-ipsec = $ovsver% %ovn$ovnver_short-vtep = $ovnver%" > /more-pkgs + sed 's/%/"/g' <<<"%openvswitch$ovsver-devel% %openvswitch$ovsver-ipsec% %ovn$ovnver_short-vtep = $ovnver%" > /more-pkgs RUN mkdir -p /var/run/openvswitch && \ mkdir -p /var/run/ovn && \ diff --git a/MAINTAINERS.md b/MAINTAINERS.md index cf290219f5..5c530c5b86 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -2,11 +2,17 @@ The current Maintainers Group for the ovn-kubernetes Project consists of: | Name | Employer | Responsibilities | | ---- | -------- | ---------------- | -| [Dan Williams](https://github.com/dcbw) | Red Hat | All things ovnkube | | [Girish Moodalbail](https://github.com/girishmg) | NVIDIA | All things ovnkube | | [Jaime CaamaƱo Ruiz](https://github.com/jcaamano) | Red Hat | All things ovnkube | +| [Nadia Pinaeva](https://github.com/npinaeva) | Red Hat | All things ovnkube | | [Surya Seetharaman](https://github.com/tssurya) | Red Hat | All things ovnkube | | [Tim Rozet](https://github.com/trozet) | Red Hat | All things ovnkube | See [CONTRIBUTING.md](./CONTRIBUTING.md) for general contribution guidelines. See [GOVERNANCE.md](./GOVERNANCE.md) for governance guidelines and maintainer responsibilities. + +Emeritus Maintainers + +| Name | Employer | Responsibilities | +| ---- | -------- | ---------------- | +| [Dan Williams](https://github.com/dcbw) | Independent | All things ovnkube | \ No newline at end of file diff --git a/contrib/kind-common b/contrib/kind-common index 1c9fd448d4..66cc078d3e 100644 --- a/contrib/kind-common +++ b/contrib/kind-common @@ -161,10 +161,10 @@ EOF pip install -r dev-env/requirements.txt local ip_family ipv6_network - if [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then ip_family="dual" ipv6_network="--ipv6 --subnet=${METALLB_CLIENT_NET_SUBNET_IPV6}" - elif [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV6_SUPPORT" == true ]; then ip_family="ipv6" ipv6_network="--ipv6 --subnet=${METALLB_CLIENT_NET_SUBNET_IPV6}" else @@ -177,7 +177,7 @@ EOF docker network rm -f clientnet docker network create --subnet="${METALLB_CLIENT_NET_SUBNET_IPV4}" ${ipv6_network} --driver bridge clientnet docker network connect clientnet frr - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Enable IPv6 forwarding in FRR docker exec frr sysctl -w net.ipv6.conf.all.forwarding=1 fi @@ -218,10 +218,10 @@ EOF KIND_NODES=$(kind_get_nodes) for n in ${KIND_NODES}; do - if [ "$KIND_IPV4_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then docker exec "${n}" ip route add "${client_subnets_v4}" via "${kind_network_v4}" fi - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then docker exec "${n}" ip -6 route add "${client_subnets_v6}" via "${kind_network_v6}" fi done @@ -229,10 +229,10 @@ EOF # for now, we only run one test with metalLB load balancer for which this # one svcVIP (192.168.10.0/fc00:f853:ccd:e799::) is more than enough since at a time we will only # have one load balancer service - if [ "$KIND_IPV4_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then docker exec lbclient ip route add 192.168.10.0 via "${client_network_v4}" dev eth0 fi - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then docker exec lbclient ip -6 route add fc00:f853:ccd:e799:: via "${client_network_v6}" dev eth0 fi sleep 30 @@ -284,7 +284,7 @@ delete_metallb_dir() { kubectl_wait_pods() { # IPv6 cluster seems to take a little longer to come up, so extend the wait time. OVN_TIMEOUT=300 - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then OVN_TIMEOUT=480 fi @@ -450,7 +450,7 @@ install_ipamclaim_crd() { docker_create_second_disconnected_interface() { echo "adding second interfaces to nodes" - local bridge_name="${1:-kindexgw}" + local bridge_name="${1:-xgw}" echo "bridge: $bridge_name" if [ "${OCI_BIN}" = "podman" ]; then @@ -688,7 +688,7 @@ deploy_frr_external_container() { # Add route reflector client config sed -i '/remote-as 64512/a \ neighbor {{ . }} route-reflector-client' frr/frr.conf.tmpl - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Check if IPv6 address-family section exists if ! grep -q 'address-family ipv6 unicast' frr/frr.conf.tmpl; then # Add IPv6 address-family section if it doesn't exist @@ -706,7 +706,7 @@ deploy_frr_external_container() { fi ./demo.sh popd || exit 1 - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Enable IPv6 forwarding in FRR docker exec frr sysctl -w net.ipv6.conf.all.forwarding=1 fi @@ -725,10 +725,10 @@ deploy_bgp_external_server() { # | ovn-worker2 | from default pod network) # --------------------- local ip_family ipv6_network - if [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then ip_family="dual" ipv6_network="--ipv6 --subnet=${BGP_SERVER_NET_SUBNET_IPV6}" - elif [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV6_SUPPORT" == true ]; then ip_family="ipv6" ipv6_network="--ipv6 --subnet=${BGP_SERVER_NET_SUBNET_IPV6}" else @@ -746,7 +746,7 @@ deploy_bgp_external_server() { bgp_network_frr_v4=$($OCI_BIN inspect -f '{{index .NetworkSettings.Networks "bgpnet" "IPAddress"}}' frr) echo "FRR kind network IPv4: ${bgp_network_frr_v4}" $OCI_BIN exec bgpserver ip route replace default via "$bgp_network_frr_v4" - if [ "$KIND_IPV6_SUPPORT" == true ] ; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ] ; then bgp_network_frr_v6=$($OCI_BIN inspect -f '{{index .NetworkSettings.Networks "bgpnet" "GlobalIPv6Address"}}' frr) echo "FRR kind network IPv6: ${bgp_network_frr_v6}" $OCI_BIN exec bgpserver ip -6 route replace default via "$bgp_network_frr_v6" @@ -781,7 +781,7 @@ install_ffr_k8s() { # Allow receiving the bgp external server's prefix sed -i '/mode: filtered/a\ prefixes:\n - prefix: '"${BGP_SERVER_NET_SUBNET_IPV4}"'' receive_filtered.yaml # If IPv6 is enabled, add the IPv6 prefix as well - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Find all line numbers where the IPv4 prefix is defined IPv6_LINE=" - prefix: ${BGP_SERVER_NET_SUBNET_IPV6}" # Process each occurrence of the IPv4 prefix @@ -827,7 +827,7 @@ EOF # Get subnet information subnet_json=$(kubectl get node $node -o jsonpath='{.metadata.annotations.k8s\.ovn\.org/node-subnets}') - if [ "$KIND_IPV4_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then # Extract IPv4 address (first address) node_ipv4=$(echo "$node_ips" | awk '{print $1}') ipv4_subnet=$(echo "$subnet_json" | jq -r '.default[0]') @@ -840,7 +840,7 @@ EOF fi # Add IPv6 route if enabled - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Extract IPv6 address (second address, if present) node_ipv6=$(echo "$node_ips" | awk '{print $2}') ipv6_subnet=$(echo "$subnet_json" | jq -r '.default[1] // empty') diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index 462f4bbf03..c682c94ac7 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -27,6 +27,7 @@ set_default_params() { export KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} export ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} export ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} + export OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} export KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} export OVN_IMAGE=${OVN_IMAGE:-'ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu:helm'} @@ -79,7 +80,7 @@ set_default_params() { fi # Hard code ipv4 support until IPv6 is implemented - export KIND_IPV4_SUPPORT=true + export PLATFORM_IPV4_SUPPORT=true export OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} } @@ -98,6 +99,7 @@ usage() { echo " [ -ikv | --install-kubevirt ]" echo " [ -mne | --multi-network-enable ]" echo " [ -nse | --network-segmentation-enable ]" + echo " [ -nqe | --network-qos-enable ]" echo " [ -wk | --num-workers ]" echo " [ -ic | --enable-interconnect]" echo " [ -npz | --node-per-zone ]" @@ -119,6 +121,7 @@ usage() { echo "-ikv | --install-kubevirt Install kubevirt" echo "-mne | --multi-network-enable Enable multi networks. DEFAULT: Disabled" echo "-nse | --network-segmentation-enable Enable network segmentation. DEFAULT: Disabled" + echo "-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled" echo "-ha | --ha-enabled Enable high availability. DEFAULT: HA Disabled" echo "-wk | --num-workers Number of worker nodes. DEFAULT: 2 workers" echo "-cn | --cluster-name Configure the kind cluster's name" @@ -165,6 +168,8 @@ parse_args() { ;; -nse | --network-segmentation-enable) ENABLE_NETWORK_SEGMENTATION=true ;; + -nqe | --network-qos-enable ) OVN_NETWORK_QOS_ENABLE=true + ;; -ha | --ha-enabled ) OVN_HA=true KIND_NUM_MASTER=3 ;; @@ -218,6 +223,7 @@ print_params() { echo "KIND_REMOVE_TAINT = $KIND_REMOVE_TAINT" echo "ENABLE_MULTI_NET = $ENABLE_MULTI_NET" echo "ENABLE_NETWORK_SEGMENTATION = $ENABLE_NETWORK_SEGMENTATION" + echo "OVN_NETWORK_QOS_ENABLE = $OVN_NETWORK_QOS_ENABLE" echo "OVN_IMAGE = $OVN_IMAGE" echo "KIND_NUM_MASTER = $KIND_NUM_MASTER" echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" @@ -242,7 +248,7 @@ check_dependencies() { done # check for currently unsupported features - [ "${KIND_IPV6_SUPPORT}" == "true" ] && { &>1 echo "Fatal: KIND_IPV6_SUPPORT support not implemented yet"; exit 1; } ||: + [ "${PLATFORM_IPV6_SUPPORT}" == "true" ] && { &>1 echo "Fatal: PLATFORM_IPV6_SUPPORT support not implemented yet"; exit 1; } ||: } helm_prereqs() { @@ -414,6 +420,7 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableObservability=$(if [ "${OVN_OBSERV_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.emptyLbEvents=$(if [ "${OVN_EMPTY_LB_EVENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableDNSNameResolver=$(if [ "${OVN_ENABLE_DNSNAMERESOLVER}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF ) diff --git a/contrib/kind.sh b/contrib/kind.sh index 166415a763..206cf5d942 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -82,6 +82,7 @@ usage() { echo " [-ic | --enable-interconnect]" echo " [-rae | --enable-route-advertisements]" echo " [-adv | --advertise-default-network]" + echo " [-nqe | --network-qos-enable]" echo " [--isolated]" echo " [-dns | --enable-dnsnameresolver]" echo " [-obs | --observability]" @@ -141,6 +142,7 @@ usage() { echo "-sm | --scale-metrics Enable scale metrics" echo "-cm | --compact-mode Enable compact mode, ovnkube master and node run in the same process." echo "-ic | --enable-interconnect Enable interconnect with each node as a zone (only valid if OVN_HA is false)" + echo "-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled." echo "--disable-ovnkube-identity Disable per-node cert and ovnkube-identity webhook" echo "-npz | --nodes-per-zone If interconnect is enabled, number of nodes per zone (Default 1). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." echo "-mtu Define the overlay mtu" @@ -218,9 +220,9 @@ parse_args() { ;; -kt | --keep-taint ) KIND_REMOVE_TAINT=false ;; - -n4 | --no-ipv4 ) KIND_IPV4_SUPPORT=false + -n4 | --no-ipv4 ) PLATFORM_IPV4_SUPPORT=false ;; - -i6 | --ipv6 ) KIND_IPV6_SUPPORT=true + -i6 | --ipv6 ) PLATFORM_IPV6_SUPPORT=true ;; -is | --ipsec ) ENABLE_IPSEC=true ;; @@ -346,6 +348,8 @@ parse_args() { -mtu ) shift OVN_MTU=$1 ;; + -nqe | --network-qos-enable ) OVN_NETWORK_QOS_ENABLE=true + ;; --delete ) delete exit ;; @@ -386,8 +390,8 @@ print_params() { echo "KIND_DNS_DOMAIN = $KIND_DNS_DOMAIN" echo "KIND_CONFIG_FILE = $KIND_CONFIG" echo "KIND_REMOVE_TAINT = $KIND_REMOVE_TAINT" - echo "KIND_IPV4_SUPPORT = $KIND_IPV4_SUPPORT" - echo "KIND_IPV6_SUPPORT = $KIND_IPV6_SUPPORT" + echo "PLATFORM_IPV4_SUPPORT = $PLATFORM_IPV4_SUPPORT" + echo "PLATFORM_IPV6_SUPPORT = $PLATFORM_IPV6_SUPPORT" echo "ENABLE_IPSEC = $ENABLE_IPSEC" echo "KIND_ALLOW_SYSTEM_WRITES = $KIND_ALLOW_SYSTEM_WRITES" echo "KIND_EXPERIMENTAL_PROVIDER = $KIND_EXPERIMENTAL_PROVIDER" @@ -439,6 +443,7 @@ print_params() { fi fi echo "OVN_ENABLE_OVNKUBE_IDENTITY = $OVN_ENABLE_OVNKUBE_IDENTITY" + echo "OVN_NETWORK_QOS_ENABLE = $OVN_NETWORK_QOS_ENABLE" echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" echo "OVN_MTU= $OVN_MTU" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" @@ -551,8 +556,8 @@ set_default_params() { KIND_DNS_DOMAIN=${KIND_DNS_DOMAIN:-"cluster.local"} KIND_CONFIG=${KIND_CONFIG:-${DIR}/kind.yaml.j2} KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} - KIND_IPV4_SUPPORT=${KIND_IPV4_SUPPORT:-true} - KIND_IPV6_SUPPORT=${KIND_IPV6_SUPPORT:-false} + PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} + PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} ENABLE_IPSEC=${ENABLE_IPSEC:-false} OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} OVN_DISABLE_SNAT_MULTIPLE_GWS=${OVN_DISABLE_SNAT_MULTIPLE_GWS:-false} @@ -603,6 +608,7 @@ set_default_params() { KIND_NUM_MASTER=1 OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-false} OVN_ENABLE_OVNKUBE_IDENTITY=${OVN_ENABLE_OVNKUBE_IDENTITY:-true} + OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then @@ -663,7 +669,7 @@ set_default_params() { } check_ipv6() { - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Collect additional IPv6 data on test environment ERROR_FOUND=false TMPVAR=$(sysctl net.ipv6.conf.all.forwarding | awk '{print $3}') @@ -699,23 +705,23 @@ check_ipv6() { } set_cluster_cidr_ip_families() { - if [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == false ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == false ]; then IP_FAMILY="" NET_CIDR=$NET_CIDR_IPV4 SVC_CIDR=$SVC_CIDR_IPV4 echo "IPv4 Only Support: --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" - elif [ "$KIND_IPV4_SUPPORT" == false ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV4_SUPPORT" == false ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then IP_FAMILY="ipv6" NET_CIDR=$NET_CIDR_IPV6 SVC_CIDR=$SVC_CIDR_IPV6 echo "IPv6 Only Support: --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" - elif [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then IP_FAMILY="dual" NET_CIDR=$NET_CIDR_IPV4,$NET_CIDR_IPV6 SVC_CIDR=$SVC_CIDR_IPV4,$SVC_CIDR_IPV6 echo "Dual Stack Support: --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" else - echo "Invalid setup. KIND_IPV4_SUPPORT and/or KIND_IPV6_SUPPORT must be true." + echo "Invalid setup. PLATFORM_IPV4_SUPPORT and/or PLATFORM_IPV6_SUPPORT must be true." exit 1 fi } @@ -901,6 +907,7 @@ create_ovn_kube_manifests() { --enable-multi-external-gateway=true \ --enable-ovnkube-identity="${OVN_ENABLE_OVNKUBE_IDENTITY}" \ --enable-persistent-ips=true \ + --network-qos-enable="${OVN_NETWORK_QOS_ENABLE}" \ --mtu="${OVN_MTU}" \ --enable-dnsnameresolver="${OVN_ENABLE_DNSNAMERESOLVER}" \ --mtu="${OVN_MTU}" \ @@ -985,6 +992,7 @@ install_ovn() { run_kubectl apply -f k8s.ovn.org_egressqoses.yaml run_kubectl apply -f k8s.ovn.org_egressservices.yaml run_kubectl apply -f k8s.ovn.org_adminpolicybasedexternalroutes.yaml + run_kubectl apply -f k8s.ovn.org_networkqoses.yaml run_kubectl apply -f k8s.ovn.org_userdefinednetworks.yaml run_kubectl apply -f k8s.ovn.org_clusteruserdefinednetworks.yaml run_kubectl apply -f k8s.ovn.org_routeadvertisements.yaml @@ -1088,11 +1096,11 @@ docker_create_second_interface() { echo "adding second interfaces to nodes" # Create the network as dual stack, regardless of the type of the deployment. Ignore if already exists. - "$OCI_BIN" network create --ipv6 --driver=bridge kindexgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true + "$OCI_BIN" network create --ipv6 --driver=bridge xgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") for n in $KIND_NODES; do - "$OCI_BIN" network connect kindexgw "$n" + "$OCI_BIN" network connect xgw "$n" done } @@ -1100,7 +1108,7 @@ docker_create_second_interface() { # and makes sure the control-plane node is reachable by substituting 127.0.0.1 # with the control-plane container's IP run_script_in_container() { - if [ "$KIND_IPV4_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then local master_ip=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${KIND_CLUSTER_NAME}-control-plane | head -n 1) sed -i -- "s/server: .*/server: https:\/\/$master_ip:6443/g" $KUBECONFIG else diff --git a/dist/images/Dockerfile.fedora b/dist/images/Dockerfile.fedora index ed0a696741..e1789bd1e5 100644 --- a/dist/images/Dockerfile.fedora +++ b/dist/images/Dockerfile.fedora @@ -75,7 +75,7 @@ RUN git log -n 1 # Stage to download OVN RPMs from koji # ######################################## FROM fedora:41 AS kojidownloader -ARG ovnver=ovn-24.09.1-10.fc41 +ARG ovnver=ovn-24.09.2-71.fc41 USER root diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index d6d883dff0..95e4a503e8 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -95,6 +95,7 @@ OVN_ENABLE_INTERCONNECT= OVN_ENABLE_OVNKUBE_IDENTITY="true" OVN_ENABLE_PERSISTENT_IPS= OVN_ENABLE_SVC_TEMPLATE_SUPPORT="true" +OVN_NETWORK_QOS_ENABLE= OVN_ENABLE_DNSNAMERESOLVER="false" OVN_NOHOSTSUBNET_LABEL="" OVN_DISABLE_REQUESTEDCHASSIS="false" @@ -362,6 +363,9 @@ while [ "$1" != "" ]; do --enable-svc-template-support) OVN_ENABLE_SVC_TEMPLATE_SUPPORT=$VALUE ;; + --network-qos-enable) + OVN_NETWORK_QOS_ENABLE=$VALUE + ;; --enable-dnsnameresolver) OVN_ENABLE_DNSNAMERESOLVER=$VALUE ;; @@ -565,6 +569,9 @@ echo "ovn_enable_persistent_ips: ${ovn_enable_persistent_ips}" ovn_enable_svc_template_support=${OVN_ENABLE_SVC_TEMPLATE_SUPPORT} echo "ovn_enable_svc_template_support: ${ovn_enable_svc_template_support}" +ovn_network_qos_enable=${OVN_NETWORK_QOS_ENABLE} +echo "ovn_network_qos_enable: ${ovn_network_qos_enable}" + ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER} echo "ovn_enable_dnsnameresolver: ${ovn_enable_dnsnameresolver}" @@ -627,6 +634,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_observ_enable=${ovn_observ_enable} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovnkube_app_name=ovnkube-node \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node.yaml @@ -680,6 +688,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_observ_enable=${ovn_observ_enable} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovnkube_app_name=ovnkube-node-dpu \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node-dpu.yaml @@ -722,6 +731,7 @@ ovn_image=${image} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovnkube_app_name=ovnkube-node-dpu-host \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node-dpu-host.yaml @@ -768,6 +778,7 @@ ovn_image=${ovnkube_image} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ @@ -812,6 +823,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovn_v4_transit_switch_subnet=${ovn_v4_transit_switch_subnet} \ ovn_v6_transit_switch_subnet=${ovn_v6_transit_switch_subnet} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ @@ -909,6 +921,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovn_northd_backoff_interval=${ovn_northd_backoff_interval} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ @@ -974,6 +987,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ ovn_northd_backoff_interval=${ovn_enable_backoff_interval} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ @@ -1060,6 +1074,7 @@ cp ../templates/k8s.ovn.org_egressips.yaml.j2 ${output_dir}/k8s.ovn.org_egressip cp ../templates/k8s.ovn.org_egressqoses.yaml.j2 ${output_dir}/k8s.ovn.org_egressqoses.yaml cp ../templates/k8s.ovn.org_egressservices.yaml.j2 ${output_dir}/k8s.ovn.org_egressservices.yaml cp ../templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 ${output_dir}/k8s.ovn.org_adminpolicybasedexternalroutes.yaml +cp ../templates/k8s.ovn.org_networkqoses.yaml.j2 ${output_dir}/k8s.ovn.org_networkqoses.yaml cp ../templates/k8s.ovn.org_userdefinednetworks.yaml.j2 ${output_dir}/k8s.ovn.org_userdefinednetworks.yaml cp ../templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 ${output_dir}/k8s.ovn.org_clusteruserdefinednetworks.yaml cp ../templates/k8s.ovn.org_routeadvertisements.yaml.j2 ${output_dir}/k8s.ovn.org_routeadvertisements.yaml diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index e4327397ef..ae77d2f13b 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -311,6 +311,9 @@ ovnkube_compact_mode_enable=${OVNKUBE_COMPACT_MODE_ENABLE:-false} ovn_northd_backoff_interval=${OVN_NORTHD_BACKOFF_INTERVAL:-"300"} # OVN_ENABLE_SVC_TEMPLATE_SUPPORT - enable svc template support ovn_enable_svc_template_support=${OVN_ENABLE_SVC_TEMPLATE_SUPPORT:-true} + +#OVN_NETWORK_QOS_ENABLE - enable network QoS for ovn-kubernetes +ovn_network_qos_enable=${OVN_NETWORK_QOS_ENABLE:-false} # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER:-false} # OVN_OBSERV_ENABLE - enable observability for ovnkube @@ -1292,6 +1295,12 @@ ovn-master() { fi echo "ovn_disable_requestedchassis_flag=${ovn_disable_requestedchassis_flag}" + network_qos_enabled_flag= + if [[ ${ovn_network_qos_enable} == "true" ]]; then + network_qos_enabled_flag="--enable-network-qos" + fi + echo "network_qos_enabled_flag=${network_qos_enabled_flag}" + init_node_flags= if [[ ${ovnkube_compact_mode_enable} == "true" ]]; then init_node_flags="--init-node ${K8S_NODE} --nodeport" @@ -1344,6 +1353,7 @@ ovn-master() { ${ovn_v6_join_subnet_opt} \ ${ovn_v6_masquerade_subnet_opt} \ ${persistent_ips_enabled_flag} \ + ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ ${nohostsubnet_label_option} \ ${ovn_disable_requestedchassis_flag} \ @@ -1598,6 +1608,12 @@ ovnkube-controller() { fi echo "ovn_enable_svc_template_support_flag=${ovn_enable_svc_template_support_flag}" + network_qos_enabled_flag= + if [[ ${ovn_network_qos_enable} == "true" ]]; then + network_qos_enabled_flag="--enable-network-qos" + fi + echo "network_qos_enabled_flag=${network_qos_enabled_flag}" + ovn_enable_dnsnameresolver_flag= if [[ ${ovn_enable_dnsnameresolver} == "true" ]]; then ovn_enable_dnsnameresolver_flag="--enable-dns-name-resolver" @@ -1642,6 +1658,7 @@ ovnkube-controller() { ${ovn_v4_masquerade_subnet_opt} \ ${ovn_v6_join_subnet_opt} \ ${ovn_v6_masquerade_subnet_opt} \ + ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --gateway-mode=${ovn_gateway_mode} \ @@ -2019,6 +2036,12 @@ ovnkube-controller-with-node() { fi echo "ovn_enable_svc_template_support_flag=${ovn_enable_svc_template_support_flag}" + network_qos_enabled_flag= + if [[ ${ovn_network_qos_enable} == "true" ]]; then + network_qos_enabled_flag="--enable-network-qos" + fi + echo "network_qos_enabled_flag=${network_qos_enabled_flag}" + ovn_enable_dnsnameresolver_flag= if [[ ${ovn_enable_dnsnameresolver} == "true" ]]; then ovn_enable_dnsnameresolver_flag="--enable-dns-name-resolver" @@ -2081,6 +2104,7 @@ ovnkube-controller-with-node() { ${routable_mtu_flag} \ ${sflow_targets} \ ${ssl_opts} \ + ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --export-ovs-metrics \ @@ -2264,6 +2288,12 @@ ovn-cluster-manager() { fi echo "empty_lb_events_flag=${empty_lb_events_flag}" + network_qos_enabled_flag= + if [[ ${ovn_network_qos_enable} == "true" ]]; then + network_qos_enabled_flag="--enable-network-qos" + fi + echo "network_qos_enabled_flag=${network_qos_enabled_flag}" + ovn_enable_dnsnameresolver_flag= if [[ ${ovn_enable_dnsnameresolver} == "true" ]]; then ovn_enable_dnsnameresolver_flag="--enable-dns-name-resolver" @@ -2295,7 +2325,9 @@ ovn-cluster-manager() { ${ovn_v6_masquerade_subnet_opt} \ ${ovn_v4_transit_switch_subnet_opt} \ ${ovn_v6_transit_switch_subnet_opt} \ + ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ + --gateway-mode=${ovn_gateway_mode} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --host-network-namespace ${ovn_host_network_namespace} \ --logfile-maxage=${ovnkube_logfile_maxage} \ @@ -2655,6 +2687,12 @@ ovn-node() { fi echo "ovn_conntrack_zone_flag=${ovn_conntrack_zone_flag}" + network_qos_enabled_flag= + if [[ ${ovn_network_qos_enable} == "true" ]]; then + network_qos_enabled_flag="--enable-network-qos" + fi + echo "network_qos_enabled_flag=${network_qos_enabled_flag}" + ovn_v4_masquerade_subnet_opt= if [[ -n ${ovn_v4_masquerade_subnet} ]]; then ovn_v4_masquerade_subnet_opt="--gateway-v4-masquerade-subnet=${ovn_v4_masquerade_subnet}" @@ -2705,6 +2743,7 @@ ovn-node() { ${ovn_unprivileged_flag} \ ${routable_mtu_flag} \ ${sflow_targets} \ + ${network_qos_enabled_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --export-ovs-metrics \ --gateway-mode=${ovn_gateway_mode} ${ovn_gateway_opts} \ diff --git a/dist/templates/k8s.ovn.org_networkqoses.yaml.j2 b/dist/templates/k8s.ovn.org_networkqoses.yaml.j2 new file mode 100644 index 0000000000..f205c8028f --- /dev/null +++ b/dist/templates/k8s.ovn.org_networkqoses.yaml.j2 @@ -0,0 +1,776 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.4 + name: networkqoses.k8s.ovn.org +spec: + group: k8s.ovn.org + names: + kind: NetworkQoS + listKind: NetworkQoSList + plural: networkqoses + singular: networkqos + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.status + name: Status + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + NetworkQoS is a CRD that allows the user to define a DSCP marking and metering + for pods ingress/egress traffic on its namespace to specified CIDRs, + protocol and port. Traffic belong these pods will be checked against + each Rule in the namespace's NetworkQoS, and if there is a match the traffic + is marked with relevant DSCP value and enforcing specified policing + parameters. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the desired state of NetworkQoS + properties: + egress: + description: |- + egress a collection of Egress NetworkQoS rule objects. A total of 20 rules will + be allowed in each NetworkQoS instance. The relative precedence of egress rules + within a single NetworkQos object (all of which share the priority) will be + determined by the order in which the rule is written. Thus, a rule that appears + first in the list of egress rules would take the lower precedence. + items: + properties: + bandwidth: + description: |- + Bandwidth controls the maximum of rate traffic that can be sent + or received on the matching packets. + properties: + burst: + description: |- + burst The value of burst rate limit in kilobits. + This also needs rate to be specified. + format: int32 + maximum: 4294967295 + minimum: 1 + type: integer + rate: + description: |- + rate The value of rate limit in kbps. Traffic over the limit + will be dropped. + format: int32 + maximum: 4294967295 + minimum: 1 + type: integer + type: object + classifier: + description: |- + classifier The classifier on which packets should match + to apply the NetworkQoS Rule. + This field is optional, and in case it is not set the rule is applied + to all egress traffic regardless of the destination. + properties: + ports: + items: + description: |- + Port specifies destination protocol and port on which NetworkQoS + rule is applied + properties: + port: + description: port that the traffic must match + format: int32 + maximum: 65535 + minimum: 1 + type: integer + protocol: + description: protocol (tcp, udp, sctp) that the traffic + must match. + pattern: ^TCP|UDP|SCTP$ + type: string + type: object + type: array + to: + items: + description: |- + Destination describes a peer to apply NetworkQoS configuration for the outgoing traffic. + Only certain combinations of fields are allowed. + properties: + ipBlock: + description: |- + ipBlock defines policy on a particular IPBlock. If this field is set then + neither of the other fields can be. + properties: + cidr: + description: |- + cidr is a string representing the IPBlock + Valid examples are "192.168.1.0/24" or "2001:db8::/64" + type: string + except: + description: |- + except is a slice of CIDRs that should not be included within an IPBlock + Valid examples are "192.168.1.0/24" or "2001:db8::/64" + Except values will be rejected if they are outside the cidr range + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - cidr + type: object + namespaceSelector: + description: |- + namespaceSelector selects namespaces using cluster-scoped labels. This field follows + standard label selector semantics; if present but empty, it selects all namespaces. + + If podSelector is also set, then the NetworkQoS as a whole selects + the pods matching podSelector in the namespaces selected by namespaceSelector. + Otherwise it selects all pods in the namespaces selected by namespaceSelector. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + podSelector: + description: |- + podSelector is a label selector which selects pods. This field follows standard label + selector semantics; if present but empty, it selects all pods. + + If namespaceSelector is also set, then the NetworkQoS as a whole selects + the pods matching podSelector in the Namespaces selected by NamespaceSelector. + Otherwise it selects the pods matching podSelector in the NetworkQoS's own namespace. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: object + x-kubernetes-validations: + - message: Can't specify both podSelector/namespaceSelector + and ipBlock + rule: '!(has(self.ipBlock) && (has(self.podSelector) + || has(self.namespaceSelector)))' + type: array + type: object + dscp: + description: dscp marking value for matching pods' traffic. + maximum: 63 + minimum: 0 + type: integer + required: + - dscp + type: object + maxItems: 20 + type: array + networkSelectors: + description: |- + networkSelector selects the networks on which the pod IPs need to be added to the source address set. + NetworkQoS controller currently supports `NetworkAttachmentDefinitions` type only. + items: + description: NetworkSelector selects a set of networks. + properties: + clusterUserDefinedNetworkSelector: + description: |- + clusterUserDefinedNetworkSelector selects ClusterUserDefinedNetworks when + NetworkSelectionType is 'ClusterUserDefinedNetworks'. + properties: + networkSelector: + description: |- + networkSelector selects ClusterUserDefinedNetworks by label. A null + selector will mot match anything, while an empty ({}) selector will match + all. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - networkSelector + type: object + networkAttachmentDefinitionSelector: + description: |- + networkAttachmentDefinitionSelector selects networks defined in the + selected NetworkAttachmentDefinitions when NetworkSelectionType is + 'SecondaryUserDefinedNetworks'. + properties: + namespaceSelector: + description: |- + namespaceSelector selects namespaces where the + NetworkAttachmentDefinitions are defined. This field follows standard + label selector semantics. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + networkSelector: + description: |- + networkSelector selects NetworkAttachmentDefinitions within the selected + namespaces by label. This field follows standard label selector + semantics. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - namespaceSelector + - networkSelector + type: object + networkSelectionType: + description: networkSelectionType determines the type of networks + selected. + enum: + - DefaultNetwork + - ClusterUserDefinedNetworks + - PrimaryUserDefinedNetworks + - SecondaryUserDefinedNetworks + - NetworkAttachmentDefinitions + type: string + primaryUserDefinedNetworkSelector: + description: |- + primaryUserDefinedNetworkSelector selects primary UserDefinedNetworks when + NetworkSelectionType is 'PrimaryUserDefinedNetworks'. + properties: + namespaceSelector: + description: |- + namespaceSelector select the primary UserDefinedNetworks that are servind + the selected namespaces. This field follows standard label selector + semantics. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - namespaceSelector + type: object + secondaryUserDefinedNetworkSelector: + description: |- + secondaryUserDefinedNetworkSelector selects secondary UserDefinedNetworks + when NetworkSelectionType is 'SecondaryUserDefinedNetworks'. + properties: + namespaceSelector: + description: |- + namespaceSelector selects namespaces where the secondary + UserDefinedNetworks are defined. This field follows standard label + selector semantics. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + networkSelector: + description: |- + networkSelector selects secondary UserDefinedNetworks within the selected + namespaces by label. This field follows standard label selector + semantics. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - namespaceSelector + - networkSelector + type: object + required: + - networkSelectionType + type: object + x-kubernetes-validations: + - message: 'Inconsistent selector: both networkSelectionType ClusterUserDefinedNetworks + and clusterUserDefinedNetworkSelector have to be set or neither' + rule: '!has(self.networkSelectionType) ? true : has(self.clusterUserDefinedNetworkSelector) + ? self.networkSelectionType == ''ClusterUserDefinedNetworks'' + : self.networkSelectionType != ''ClusterUserDefinedNetworks''' + - message: 'Inconsistent selector: both networkSelectionType PrimaryUserDefinedNetworks + and primaryUserDefinedNetworkSelector have to be set or neither' + rule: '!has(self.networkSelectionType) ? true : has(self.primaryUserDefinedNetworkSelector) + ? self.networkSelectionType == ''PrimaryUserDefinedNetworks'' + : self.networkSelectionType != ''PrimaryUserDefinedNetworks''' + - message: 'Inconsistent selector: both networkSelectionType SecondaryUserDefinedNetworks + and secondaryUserDefinedNetworkSelector have to be set or neither' + rule: '!has(self.networkSelectionType) ? true : has(self.secondaryUserDefinedNetworkSelector) + ? self.networkSelectionType == ''SecondaryUserDefinedNetworks'' + : self.networkSelectionType != ''SecondaryUserDefinedNetworks''' + - message: 'Inconsistent selector: both networkSelectionType NetworkAttachmentDefinitions + and networkAttachmentDefinitionSelector have to be set or neither' + rule: '!has(self.networkSelectionType) ? true : has(self.networkAttachmentDefinitionSelector) + ? self.networkSelectionType == ''NetworkAttachmentDefinitions'' + : self.networkSelectionType != ''NetworkAttachmentDefinitions''' + maxItems: 5 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - networkSelectionType + x-kubernetes-list-type: map + x-kubernetes-validations: + - message: networkSelector is immutable + rule: self == oldSelf + - message: Unsupported network selection type + rule: self.all(sel, sel.networkSelectionType == 'ClusterUserDefinedNetworks' + || sel.networkSelectionType == 'NetworkAttachmentDefinitions') + podSelector: + description: |- + podSelector applies the NetworkQoS rule only to the pods in the namespace whose label + matches this definition. This field is optional, and in case it is not set + results in the rule being applied to all pods in the namespace. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + priority: + description: |- + priority is a value from 0 to 100 and represents the NetworkQoS' priority. + QoSes with numerically higher priority takes precedence over those with lower. + maximum: 100 + minimum: 0 + type: integer + required: + - egress + - priority + type: object + status: + description: Status defines the observed state of NetworkQoS + properties: + conditions: + description: An array of condition objects indicating details about + status of NetworkQoS object. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + status: + description: A concise indication of whether the NetworkQoS resource + is applied with success. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index af72a364a2..2373f38cff 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -181,6 +181,8 @@ spec: value: "{{ ovn_v6_transit_switch_subnet }}" - name: OVN_ENABLE_PERSISTENT_IPS value: "{{ ovn_enable_persistent_ips }}" + - name: OVN_NETWORK_QOS_ENABLE + value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" # end of container diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 389a539dff..47ea81a6dc 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -308,6 +308,8 @@ spec: key: host_network_namespace - name: OVN_ENABLE_PERSISTENT_IPS value: "{{ ovn_enable_persistent_ips }}" + - name: OVN_NETWORK_QOS_ENABLE + value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" # end of container diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 8fea157646..98591a5ac1 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -217,6 +217,8 @@ spec: value: "{{ ovn_ex_gw_networking_interface }}" - name: OVN_ENABLE_OVNKUBE_IDENTITY value: "{{ ovn_enable_ovnkube_identity }}" + - name: OVN_NETWORK_QOS_ENABLE + value: "{{ ovn_network_qos_enable }}" {% if ovnkube_app_name!="ovnkube-node-dpu-host" -%} - name: OVN_SSL_ENABLE value: "{{ ovn_ssl_en }}" diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index 3007b7c19c..d2d485cca7 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -460,6 +460,8 @@ spec: value: "{{ ovn_enable_ovnkube_identity }}" - name: OVN_ENABLE_SVC_TEMPLATE_SUPPORT value: "{{ ovn_enable_svc_template_support }}" + - name: OVN_NETWORK_QOS_ENABLE + value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" diff --git a/dist/templates/ovnkube-zone-controller.yaml.j2 b/dist/templates/ovnkube-zone-controller.yaml.j2 index d5cb2a1282..363ade3014 100644 --- a/dist/templates/ovnkube-zone-controller.yaml.j2 +++ b/dist/templates/ovnkube-zone-controller.yaml.j2 @@ -377,6 +377,8 @@ spec: value: "{{ ovn_enable_multi_external_gateway }}" - name: OVN_ENABLE_SVC_TEMPLATE_SUPPORT value: "{{ ovn_enable_svc_template_support }}" + - name: OVN_NETWORK_QOS_ENABLE + value: "{{ ovn_network_qos_enable }}" - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 index 3b347dd91c..44f7020165 100644 --- a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 +++ b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 @@ -76,6 +76,7 @@ rules: - userdefinednetworks - clusteruserdefinednetworks - routeadvertisements + - networkqoses verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.ovn.org"] resources: @@ -103,6 +104,7 @@ rules: - adminpolicybasedexternalroutes/status - egressfirewalls/status - egressqoses/status + - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: ["policy.networking.k8s.io"] resources: diff --git a/dist/templates/rbac-ovnkube-master.yaml.j2 b/dist/templates/rbac-ovnkube-master.yaml.j2 index ab4c98fd89..c99d655f91 100644 --- a/dist/templates/rbac-ovnkube-master.yaml.j2 +++ b/dist/templates/rbac-ovnkube-master.yaml.j2 @@ -85,6 +85,7 @@ rules: - adminpolicybasedexternalroutes - userdefinednetworks - clusteruserdefinednetworks + - networkqoses verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.cni.cncf.io"] resources: @@ -119,6 +120,8 @@ rules: - clusteruserdefinednetworks - clusteruserdefinednetworks/status - clusteruserdefinednetworks/finalizers + - networkqoses + - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: [""] resources: diff --git a/dist/templates/rbac-ovnkube-node.yaml.j2 b/dist/templates/rbac-ovnkube-node.yaml.j2 index 1e9e413c27..b0edb15f17 100644 --- a/dist/templates/rbac-ovnkube-node.yaml.j2 +++ b/dist/templates/rbac-ovnkube-node.yaml.j2 @@ -163,6 +163,7 @@ rules: - adminpolicybasedexternalroutes/status - egressqoses/status - routeadvertisements/status + - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: ["policy.networking.k8s.io"] resources: @@ -185,6 +186,7 @@ rules: - userdefinednetworks - clusteruserdefinednetworks - routeadvertisements + - networkqoses verbs: [ "get", "list", "watch" ] {% if ovn_enable_ovnkube_identity == "true" -%} - apiGroups: ["certificates.k8s.io"] diff --git a/docs/ci/ci.md b/docs/ci/ci.md index 1fa5e2ce7b..b0f98c0762 100644 --- a/docs/ci/ci.md +++ b/docs/ci/ci.md @@ -160,8 +160,8 @@ export OVN_EMPTY_LB_EVENTS=[true|false] export OVN_HA=[true|false] export OVN_DISABLE_SNAT_MULTIPLE_GWS=[true|false] export OVN_GATEWAY_MODE=["local"|"shared"] -export KIND_IPV4_SUPPORT=[true|false] -export KIND_IPV6_SUPPORT=[true|false] +export PLATFORM_IPV4_SUPPORT=[true|false] +export PLATFORM_IPV6_SUPPORT=[true|false] # not required for the OVN Kind installation script, but export this already for later OVN_SECOND_BRIDGE=[true|false] ``` @@ -181,8 +181,8 @@ export OVN_EMPTY_LB_EVENTS=true export OVN_HA=false export OVN_DISABLE_SNAT_MULTIPLE_GWS=false export OVN_GATEWAY_MODE="local" -export KIND_IPV4_SUPPORT=true -export KIND_IPV6_SUPPORT=false +export PLATFORM_IPV4_SUPPORT=true +export PLATFORM_IPV6_SUPPORT=false # not required for the OVN Kind installation script, but export this already for later export OVN_SECOND_BRIDGE=false ``` @@ -353,13 +353,13 @@ ok github.com/ovn-org/ovn-kubernetes/test/e2e 12.371s ### IPv6 tests To skip the IPv4 only tests (in a IPv6 only deployment), pass the -`KIND_IPV6_SUPPORT=true` environmental variable to `make`: +`PLATFORM_IPV6_SUPPORT=true` environmental variable to `make`: ``` $ cd $GOPATH/src/github.com/ovn-org/ovn-kubernetes $ pushd test -$ KIND_IPV6_SUPPORT=true make shard-conformance +$ PLATFORM_IPV6_SUPPORT=true make shard-conformance $ popd ``` diff --git a/docs/features/network-qos.md b/docs/features/network-qos.md new file mode 100644 index 0000000000..4815359333 --- /dev/null +++ b/docs/features/network-qos.md @@ -0,0 +1,20 @@ +# Network QoS + +## Introduction + +To enable NetworkQoS, we will use Differentiated Services Code Point (DSCP) which allows us to classify packets by setting a 6-bit field in the IP header, effectively marking the priority of a given packet relative to other packets as "Critical", "High Priority", "Best Effort" and so on. + +## Problem Statement +The workloads running in Kubernetes using OVN-Kubernetes as a networking backend might have different requirements in handling network traffic. For example video streaming application needs low latency and jitter whereas storage application can tolerate with packet loss. Hence NetworkQoS is essential in meeting these SLAs to provide better service quality. + +The workload taffic can be either east west (pod to pod traffic) or north south traffic (pod to external traffic) types in a Kubernetes cluster which is limited by finite bandwidth. So NetworkQoS must ensure high priority applications get the necessary NetworkQoS marking so that it can prevent network conjestion. + +## Proposed Solution + +By introducing a new CRD `NetworkQoS`, users could specify a DSCP value for packets originating from pods on a given namespace heading to a specified Namespace Selector, Pod Selector, CIDR, Protocol and Port. This also supports metering for the packets by specifying bandwidth parameters `rate` and/or `burst`. +The CRD will be Namespaced, with multiple resources allowed per namespace. +The resources will be watched by ovn-k, which in turn will configure OVN's [QoS Table](https://man7.org/linux/man-pages/man5/ovn-nb.5.html#NetworkQoS_TABLE). +The `NetworkQoS` also has `status` field which is populated by ovn-k which helps users to identify whether NetworkQoS rules are configured correctly in OVN or not. + +## Sources +- [OKEP-4380: Network QoS Support](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/okeps/okep-4380-network-qos.md) diff --git a/docs/images/VRFs.svg b/docs/images/VRFs.svg new file mode 100644 index 0000000000..855417fa73 --- /dev/null +++ b/docs/images/VRFs.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/egress-ip-l2-primary.svg b/docs/images/egress-ip-l2-primary.svg new file mode 100644 index 0000000000..e1454122a9 --- /dev/null +++ b/docs/images/egress-ip-l2-primary.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/egress-ip-vrf-lgw.svg b/docs/images/egress-ip-vrf-lgw.svg new file mode 100644 index 0000000000..cb6222bf6a --- /dev/null +++ b/docs/images/egress-ip-vrf-lgw.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/egress-ip-vrf-sgw.svg b/docs/images/egress-ip-vrf-sgw.svg new file mode 100644 index 0000000000..e2387ae778 --- /dev/null +++ b/docs/images/egress-ip-vrf-sgw.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/local-gw-node-setup-vrfs.svg b/docs/images/local-gw-node-setup-vrfs.svg new file mode 100644 index 0000000000..9b7ba269a5 --- /dev/null +++ b/docs/images/local-gw-node-setup-vrfs.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/multi-homing-l2-gw.svg b/docs/images/multi-homing-l2-gw.svg new file mode 100644 index 0000000000..f633254ac4 --- /dev/null +++ b/docs/images/multi-homing-l2-gw.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/installation/launching-ovn-kubernetes-on-kind.md b/docs/installation/launching-ovn-kubernetes-on-kind.md index 586bd9f747..c3a49ddde7 100644 --- a/docs/installation/launching-ovn-kubernetes-on-kind.md +++ b/docs/installation/launching-ovn-kubernetes-on-kind.md @@ -6,14 +6,14 @@ KIND (Kubernetes in Docker) deployment of OVN kubernetes is a fast and easy mean - 20 GB of free space in root file system - Docker run time or podman -- [KIND]( https://kubernetes.io/docs/setup/learning-environment/kind/ ) +- [KIND](https://kubernetes.io/docs/setup/learning-environment/kind/) - Installation instructions can be found at https://github.com/kubernetes-sigs/kind#installation-and-usage. - NOTE: The OVN-Kubernetes [ovn-kubernetes/contrib/kind.sh](https://github.com/ovn-org/ovn-kubernetes/blob/master/contrib/kind.sh) and [ovn-kubernetes/contrib/kind.yaml](https://github.com/ovn-org/ovn-kubernetes/blob/master/contrib/kind.yaml) files provision port 11337. If firewalld is enabled, this port will need to be unblocked: ``` sudo firewall-cmd --permanent --add-port=11337/tcp; sudo firewall-cmd --reload ``` -- [kubectl]( https://kubernetes.io/docs/tasks/tools/install-kubectl/ ) +- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) - Python and pip - jq - openssl @@ -129,6 +129,7 @@ usage: kind.sh [[[-cf |--config-file ] [-kt|keep-taint] [-ha|--ha-enabled] [-cl |--ovn-loglevel-controller ] [-me|--multicast-enabled] [-ep |--experimental-provider ] | [-eb |--egress-gw-separate-bridge] + [-nqe|--network-qos-enable] [-h]] -cf | --config-file Name of the KIND J2 configuration file. @@ -170,6 +171,7 @@ usage: kind.sh [[[-cf |--config-file ] [-kt|keep-taint] [-ha|--ha-enabled] -cl | --ovn-loglevel-controller Log config for ovn-controller DEFAULT: '-vconsole:info'. -ep | --experimental-provider Use an experimental OCI provider such as podman, instead of docker. DEFAULT: Disabled. -eb | --egress-gw-separate-bridge The external gateway traffic uses a separate bridge. +-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled. -lr |--local-kind-registry Will start and connect a kind local registry to push/retrieve images --delete Delete current cluster --deploy Deploy ovn kubernetes without restarting kind @@ -322,7 +324,7 @@ $ cd ../dist/images/ $ make fedora-image $ cd ../../contrib/ -$ KIND_IPV4_SUPPORT=false KIND_IPV6_SUPPORT=true ./kind.sh +$ PLATFORM_IPV4_SUPPORT=false PLATFORM_IPV6_SUPPORT=true ./kind.sh ``` Once `kind.sh` completes, setup kube config file: @@ -426,7 +428,7 @@ $ cd ../dist/images/ $ make fedora-image $ cd ../../contrib/ -$ KIND_IPV4_SUPPORT=true KIND_IPV6_SUPPORT=true K8S_VERSION=v1.32.3 ./kind.sh +$ PLATFORM_IPV4_SUPPORT=true PLATFORM_IPV6_SUPPORT=true K8S_VERSION=v1.32.3 ./kind.sh ``` Once `kind.sh` completes, setup kube config file: diff --git a/docs/installation/launching-ovn-kubernetes-with-helm.md b/docs/installation/launching-ovn-kubernetes-with-helm.md index 1e658198a4..c25c107af5 100644 --- a/docs/installation/launching-ovn-kubernetes-with-helm.md +++ b/docs/installation/launching-ovn-kubernetes-with-helm.md @@ -291,6 +291,15 @@ false Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes + + global.enableNetworkQos + string +
+""
+
+ + Enables network QoS support from/to pods + global.enableMulticast string diff --git a/docs/okeps/okep-4380-network-qos.md b/docs/okeps/okep-4380-network-qos.md index 0f5384808a..d792bbd4ce 100644 --- a/docs/okeps/okep-4380-network-qos.md +++ b/docs/okeps/okep-4380-network-qos.md @@ -54,7 +54,7 @@ Another strategy for providing differential treatment to workload network traffi packets using DSCP (a 6-bit field in the IP header). These marked packets can then be handled differently by in-zone and in-cluster services. OVN supports this packet marking capability through OVS, allowing traffic to be classified based on specific match criteria. OVN marks the inner -packet’s IP header. So, the marking appears inside the GENEVE tunnel. There are ways to transfer +packet's IP header. So, the marking appears inside the GENEVE tunnel. There are ways to transfer this marking to outer header and influence how the underlay network fabric should handle such packets, however that is outside the scope of this proposal. @@ -156,9 +156,10 @@ whether NetworkQoS rules are configured correctly in OVN or not. ```go import ( -corev1 "k8s.io/api/core/v1" -networkingv1 "k8s.io/api/networking/v1" -metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + networkingv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" ) // +genclient @@ -175,141 +176,141 @@ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" // is marked with relevant DSCP value and enforcing specified policing // parameters. type NetworkQoS struct { -metav1.TypeMeta `json:",inline"` -metav1.ObjectMeta `json:"metadata,omitempty"` + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` -Spec Spec `json:"spec,omitempty"` -Status Status `json:"status,omitempty"` + Spec Spec `json:"spec,omitempty"` + Status Status `json:"status,omitempty"` } // Spec defines the desired state of NetworkQoS type Spec struct { -// netAttachRefs points to a list of objects which could be either NAD, UDN, or Cluster UDN. -// In the case of NAD, the network type could be of type Layer-3, Layer-2, or Localnet. -// If not specified, then the primary network of the selected Pods will be chosen. -// +optional -// +kubebuilder:validation:XValidation:rule="self == oldSelf", message="netAttachRefs is immutable" -NetworkAttachmentRefs []corev1.ObjectReference `json:"netAttachRefs,omitempty"` - -// podSelector applies the NetworkQoS rule only to the pods in the namespace whose label -// matches this definition. This field is optional, and in case it is not set -// results in the rule being applied to all pods in the namespace. -// +optional -PodSelector metav1.LabelSelector `json:"podSelector,omitempty"` - -// priority is a value from 0 to 100 and represents the NetworkQoS' priority. -// QoSes with numerically higher priority takes precedence over those with lower. -// +kubebuilder:validation:Maximum:=100 -// +kubebuilder:validation:Minimum:=0 -Priority int `json:"priority"` - -// egress a collection of Egress NetworkQoS rule objects. A total of 20 rules will -// be allowed in each NetworkQoS instance. The relative precedence of egress rules -// within a single NetworkQos object (all of which share the priority) will be -// determined by the order in which the rule is written. Thus, a rule that appears -// first in the list of egress rules would take the lower precedence. -Egress []Rule `json:"egress"` + // networkSelector selects the networks on which the pod IPs need to be added to the source address set. + // NetworkQoS controller currently supports `NetworkAttachmentDefinitions` type only. + // +optional + // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="networkSelector is immutable" + NetworkSelectors crdtypes.NetworkSelectors `json:"networkSelectors,omitempty"` + + // podSelector applies the NetworkQoS rule only to the pods in the namespace whose label + // matches this definition. This field is optional, and in case it is not set + // results in the rule being applied to all pods in the namespace. + // +optional + PodSelector metav1.LabelSelector `json:"podSelector,omitempty"` + + // priority is a value from 0 to 100 and represents the NetworkQoS' priority. + // QoSes with numerically higher priority takes precedence over those with lower. + // +kubebuilder:validation:Maximum:=100 + // +kubebuilder:validation:Minimum:=0 + Priority int `json:"priority"` + + // egress a collection of Egress NetworkQoS rule objects. A total of 20 rules will + // be allowed in each NetworkQoS instance. The relative precedence of egress rules + // within a single NetworkQos object (all of which share the priority) will be + // determined by the order in which the rule is written. Thus, a rule that appears + // first in the list of egress rules would take the lower precedence. + // +kubebuilder:validation:MaxItems=20 + Egress []Rule `json:"egress"` } type Rule struct { -// dscp marking value for matching pods' traffic. -// +kubebuilder:validation:Maximum:=63 -// +kubebuilder:validation:Minimum:=0 -DSCP int `json:"dscp"` - -// classifier The classifier on which packets should match -// to apply the NetworkQoS Rule. -// This field is optional, and in case it is not set the rule is applied -// to all egress traffic regardless of the destination. -// +optional -Classifier Classifier `json:"classifier"` - -// +optional -Bandwidth Bandwidth `json:"bandwidth"` + // dscp marking value for matching pods' traffic. + // +kubebuilder:validation:Maximum:=63 + // +kubebuilder:validation:Minimum:=0 + DSCP int `json:"dscp"` + + // classifier The classifier on which packets should match + // to apply the NetworkQoS Rule. + // This field is optional, and in case it is not set the rule is applied + // to all egress traffic regardless of the destination. + // +optional + Classifier Classifier `json:"classifier"` + + // +optional + Bandwidth Bandwidth `json:"bandwidth"` } type Classifier struct { -// +optional -To []Destination `json:"to"` + // +optional + To []Destination `json:"to"` -// +optional -Port Port `json:"port"` + // +optional + Ports []*Port `json:"ports"` } // Bandwidth controls the maximum of rate traffic that can be sent // or received on the matching packets. type Bandwidth struct { -// rate The value of rate limit in kbps. Traffic over the limit -// will be dropped. -// +kubebuilder:validation:Minimum:=1 -// +kubebuilder:validation:Maximum:=4294967295 -// +optional -Rate uint32 `json:"rate"` - -// burst The value of burst rate limit in kilobits. -// This also needs rate to be specified. -// +kubebuilder:validation:Minimum:=1 -// +kubebuilder:validation:Maximum:=4294967295 -// +optional -Burst uint32 `json:"burst"` + // rate The value of rate limit in kbps. Traffic over the limit + // will be dropped. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=4294967295 + // +optional + Rate uint32 `json:"rate"` + + // burst The value of burst rate limit in kilobits. + // This also needs rate to be specified. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=4294967295 + // +optional + Burst uint32 `json:"burst"` } // Port specifies destination protocol and port on which NetworkQoS // rule is applied type Port struct { -// protocol (tcp, udp, sctp) that the traffic must match. -// +kubebuilder:validation:Pattern=^TCP|UDP|SCTP$ -// +optional -Protocol string `json:"protocol"` - -// port that the traffic must match -// +kubebuilder:validation:Minimum:=1 -// +kubebuilder:validation:Maximum:=65535 -// +optional -Port int32 `json:"port"` + // protocol (tcp, udp, sctp) that the traffic must match. + // +kubebuilder:validation:Pattern=^TCP|UDP|SCTP$ + // +optional + Protocol string `json:"protocol"` + + // port that the traffic must match + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=65535 + // +optional + Port *int32 `json:"port"` } // Destination describes a peer to apply NetworkQoS configuration for the outgoing traffic. // Only certain combinations of fields are allowed. // +kubebuilder:validation:XValidation:rule="!(has(self.ipBlock) && (has(self.podSelector) || has(self.namespaceSelector)))",message="Can't specify both podSelector/namespaceSelector and ipBlock" type Destination struct { -// podSelector is a label selector which selects pods. This field follows standard label -// selector semantics; if present but empty, it selects all pods. -// -// If namespaceSelector is also set, then the NetworkQoS as a whole selects -// the pods matching podSelector in the Namespaces selected by NamespaceSelector. -// Otherwise it selects the pods matching podSelector in the NetworkQoS's own namespace. -// +optional -PodSelector *metav1.LabelSelector `json:"podSelector,omitempty" protobuf:"bytes,1,opt,name=podSelector"` - -// namespaceSelector selects namespaces using cluster-scoped labels. This field follows -// standard label selector semantics; if present but empty, it selects all namespaces. -// -// If podSelector is also set, then the NetworkQoS as a whole selects -// the pods matching podSelector in the namespaces selected by namespaceSelector. -// Otherwise it selects all pods in the namespaces selected by namespaceSelector. -// +optional -NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty" protobuf:"bytes,2,opt,name=namespaceSelector"` - -// ipBlock defines policy on a particular IPBlock. If this field is set then -// neither of the other fields can be. -// +optional -IPBlock *networkingv1.IPBlock `json:"ipBlock,omitempty" protobuf:"bytes,3,rep,name=ipBlock"` + // podSelector is a label selector which selects pods. This field follows standard label + // selector semantics; if present but empty, it selects all pods. + // + // If namespaceSelector is also set, then the NetworkQoS as a whole selects + // the pods matching podSelector in the Namespaces selected by NamespaceSelector. + // Otherwise it selects the pods matching podSelector in the NetworkQoS's own namespace. + // +optional + PodSelector *metav1.LabelSelector `json:"podSelector,omitempty" protobuf:"bytes,1,opt,name=podSelector"` + + // namespaceSelector selects namespaces using cluster-scoped labels. This field follows + // standard label selector semantics; if present but empty, it selects all namespaces. + // + // If podSelector is also set, then the NetworkQoS as a whole selects + // the pods matching podSelector in the namespaces selected by namespaceSelector. + // Otherwise it selects all pods in the namespaces selected by namespaceSelector. + // +optional + NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty" protobuf:"bytes,2,opt,name=namespaceSelector"` + + // ipBlock defines policy on a particular IPBlock. If this field is set then + // neither of the other fields can be. + // +optional + IPBlock *networkingv1.IPBlock `json:"ipBlock,omitempty" protobuf:"bytes,3,rep,name=ipBlock"` } // Status defines the observed state of NetworkQoS type Status struct { -// A concise indication of whether the NetworkQoS resource is applied with success. -// +optional -Status string `json:"status,omitempty"` - -// An array of condition objects indicating details about status of NetworkQoS object. -// +optional -// +patchMergeKey=type -// +patchStrategy=merge -// +listType=map -// +listMapKey=type -Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` + // A concise indication of whether the NetworkQoS resource is applied with success. + // +optional + Status string `json:"status,omitempty"` + + // An array of condition objects indicating details about status of NetworkQoS object. + // +optional + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -317,9 +318,9 @@ Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" // +kubebuilder::singular=networkqos // NetworkQoSList contains a list of NetworkQoS type NetworkQoSList struct { -metav1.TypeMeta `json:",inline"` -metav1.ListMeta `json:"metadata,omitempty"` -Items []NetworkQoS `json:"items"` + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NetworkQoS `json:"items"` } ``` @@ -380,12 +381,12 @@ spec: - dscp: 11 classifier: to: - - ipBlock: - cidr: 0.0.0.0/0 - except: - - 10.0.0.0/8 - - 172.16.0.0/12 - - 192.168.0.0/16 + - ipBlock: + cidr: 0.0.0.0/0 + except: + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 ``` the equivalent of: @@ -456,10 +457,14 @@ metadata: name: qos-external-free namespace: games spec: - netAttachRefs: - - kind: NetworkAttachmentDefinition - namespace: default - name: ovn-storage + networkSelectors: + - networkSelectionType: NetworkAttachmentDefinitions + networkAttachmentDefinitionSelector: + namespaceSelector: + matchLabels: {} # Empty selector will select all namespaces + networkSelector: + matchLabels: + name: ovn-storage priority: 2 egress: - dscp: 11 @@ -467,6 +472,11 @@ spec: to: - ipBlock: cidr: 0.0.0.0/0 + ports: + - protocol: TCP + port: 80 + - protocol: TCP + port: 443 ``` This creates a new AddressSet adding default namespace pod(s) IP associated with ovn-storage diff --git a/docs/okeps/okep-5193-user-defined-networks.md b/docs/okeps/okep-5193-user-defined-networks.md new file mode 100644 index 0000000000..93da13c51c --- /dev/null +++ b/docs/okeps/okep-5193-user-defined-networks.md @@ -0,0 +1,1486 @@ +# OKEP-5193: User Defined Network Segmentation + +* Issue: [#5193](https://github.com/ovn-org/ovn-kubernetes/issues/5193) + +## Problem Statement + +OVN-Kubernetes today allows multiple different types of networks per secondary network: layer 2, layer 3, or localnet. +Pods can be connected to different networks without discretion. For the primary network, OVN-Kubernetes only supports all +pods connecting to the same layer 3 virtual topology. The scope of this effort is to bring the same flexibility of the +secondary network to the primary network. Therefore, pods are able to connect to different types of networks as their +primary network. + +Additionally, multiple and different instances of primary networks may co-exist for different users, and they will provide +native network isolation. + +## Terminology + +* **Primary Network** - The network which is used as the default gateway for the pod. Typically recognized as the eth0 + interface in the pod. +* **Secondary Network** - An additional network and interface presented to the pod. Typically created as an additional + Network Attachment Definition (NAD), leveraging Multus. Secondary Network in the context of this document refers to a + secondary network provided by the OVN-Kubernetes CNI. +* **Cluster Default Network** - This is the routed OVN network that pods attach to by default today as their primary network. + The pods default route, service access, as well as kubelet probe are all served by the interface (typically eth0) on this network. +* **User-Defined Network** - A network that may be primary or secondary, but is declared by the user. +* **Layer 2 Type Network** - An OVN-Kubernetes topology rendered into OVN where pods all connect to the same distributed + logical switch (layer 2 segment) which spans all nodes. Uses Geneve overlay. +* **Layer 3 Type Network** - An OVN-Kubernetes topology rendered into OVN where pods have a per-node logical switch and subnet. + Routing is used for pod to pod communication across nodes. This is the network type used by the cluster default network today. + Uses Geneve overlay. +* **Localnet Type Network** - An OVN-Kubernetes topology rendered into OVN where pods connect to a per-node logical switch + that is directly wired to the underlay. + +## Goals + +* Provide a configurable way to indicate that a pod should be connected to a user-defined network of a specific type as a + primary interface. +* The primary network may be configured as a layer 3 or layer 2 type network. +* Allow networks to have overlapping pod IP address space. This range may not overlap with the default cluster subnet + used for allocating pod IPs on the cluster default network today. +* The cluster default primary network defined today will remain in place as the default network pods attach to. The cluster + default network will continue to serve as the primary network for pods in a namespace that has no primary user-defined network. Pods + with primary user-defined networks will still attach to the cluster default network with limited access to Kubernetes system resources. + Pods with primary user-defined networks will have at least two network interfaces, one connected to the cluster default network and one + connected to the user-defined network. Pods with primary user-defined networks will use the user-defined network as their default + gateway. +* Allow multiple namespaces per network. +* Support cluster ingress/egress traffic for user-defined networks, including secondary networks. +* Support for ingress/egress features on user-defined primary networks where possible: + * QoS + * EgressIP + * Load Balancer and NodePort Services, as well as services with External IPs. +* In addition to ingress service support, there will be support for Kubernetes Cluster IP services in user-defined networks. The + scope of reachability to that service as well as endpoints selected for that service will be confined to the network + and corresponding namespace(s) where that service was created. +* Support for pods to continue to have access to the cluster default primary network for DNS and KAPI service access. +* Kubelet healthchecks/probes will still work on all pods. + +## Non-Goals + +* Allowing different service CIDRs to be used in different networks. +* Localnet will not be supported initially for primary networks. +* Allowing multiple primary networks per namespace. +* Hybrid overlay support on user-defined networks. + +## Future-Goals + +* DNS lookup for pods returning records for IPs on the user-defined network. In the first phase DNS will return the pod + IP on the cluster default network instead. +* Admin ability to configure networks to have access to all services and/or expose services to be accessible from all + networks. +* Ability to advertise user-defined networks to external networks using BGP/EVPN. This will enable things like: + * External -> Pod ingress per VRF (Ingress directly to pod IP) + * Multiple External Gateway (MEG) in a BGP context, with ECMP routes +* Allow connection of multiple networks via explicit router API configuration. +* An API to allow user-defined ports for pods to be exposed on the cluster default network. This may be used for things + like promethus metric scraping. +* Potentially, coming up with an alternative solution for requiring the cluster default network connectivity to the pod, + and presenting the IP of the pod to Kubernetes as the user-defined primary network IP, rather than the cluster default + network IP. +* Support for Egress Service +* Support for Host Networked Pods -> UDN pods or UDN services + +## Introduction + +As users migrate from OpenStack to Kubernetes, there is a need to provide network parity for those users. In OpenStack, +each tenant (akin to Kubernetes namespace) by default has a layer 2 network, which is isolated from any other tenant. +Connectivity to other networks must be specified explicitly as network configuration via a Neutron router. In Kubernetes +the paradigm is opposite, by default all pods can reach other pods, and security is provided by implementing Network Policy. +Network Policy can be cumbersome to configure and manage for a large cluster. It also can be limiting as it only matches +TCP, UDP, and SCTP traffic. Furthermore, large amounts of network policy can cause performance issues in CNIs. With all +these factors considered, there is a clear need to address network security in a native fashion, by using networks per +tenant to isolate traffic. + +## User-Stories/Use-Cases + +* As a user I want to be able to migrate applications traditionally on OpenStack to Kubernetes, keeping my tenant network + space isolated and having the ability to use a layer 2 network. +* As a user I want to be able to ensure network security between my namespaces without having to manage and configure + complex network policy rules. +* As an administrator, I want to be able to provision networks to my tenants to ensure their networks and applications + are natively isolated from other tenants. +* As a user, I want to be able to request a unique, primary network for my namespace without having to get administrator + permission. +* As a user, I want to be able to request new secondary networks for my namespace, without having to get administrator + permission. +* As a user, I want user-defined primary networks to be able to have similar functionality as the cluster default network, + regardless of being on a layer 2 or layer 3 type network. Features like Egress IP, Egress QoS, Kubernetes services, + Ingress, and pod Egress should all function as they do today in the cluster default network. +* As a user, I want to be able to use my own consistent IP addressing scheme in my network. I want to be able to specify + and re-use the same IP subnet for my pods across different namespaces and clusters. This provides a consistent + and repeatable network environment for administrators and users. + +## Proposed Solution + +By default, in OVN-Kubernetes pods are attached to what is known as the ā€œcluster default" network, which is a routed network +divided up into a subnet per node. All pods will continue to have an attachment to this network, even when assigned a +different primary network. Therefore, when a pod is assigned to a user-defined network, it will have two interfaces, one +to the cluster default network, and one to the user-defined network. The cluster default network attachment is required +in order to provide support for Kubelet healthcheck probes to the pod. + +All other traffic from the pod will be dropped by firewall rules on this network, when the pod is assigned a user-defined +primary network. Routes will be added to the user-defined OVN network to route KAPI/DNS traffic out towards the cluster +default network. Note, it may be desired to allow access to any Kubernetes service on the cluster default network (instead of just KAPI/DNS), +but at a minimum KAPI/DNS will be accessible. Furthermore, the IP of the pod from the Kubernetes API will continue to +show the IP assigned in the cluster default network. + +In OVN-Kubernetes secondary networks are defined using Network Attachment Definitions (NADs). For more information on +how these are configured, refer to: + +[https://github.com/ovn-org/ovn-kubernetes/blob/master/docs/features/multi-homing.md](https://github.com/ovn-org/ovn-kubernetes/blob/master/docs/features/multi-homing.md) + +The proposal here is to leverage this existing mechanism to create the network. A new field, ā€œroleā€ is +introduced to the NAD spec which indicates that this network should be used for the pod's primary network. Additionally, +a new "joinSubnets" field is added in order to specify the join subnet used inside the OVN network topology. An +example OVN-Kubernetes NAD may look like: + +``` +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: l3-network + namespace: default +spec: + config: |2 + { + "cniVersion": "0.3.1", + "name": "l3-network", + "type": "ovn-k8s-cni-overlay", + "topology":"layer3", + "subnets": "10.128.0.0/16/24,2600:db8::/29", + "joinSubnets": "100.65.0.0/24,fd99::/64", + "mtu": 1400, + "netAttachDefName": "default/l3-network", + "role": primary + } +``` + +The NAD must be created before any pods are created for this namespace. In order to enforce this requirement, a required +label will need to be added to a namespace during namespace creation time to indicate this namespace will have a primary +UDN. The required label will be "k8s.ovn.org/primary-user-defined-network". It is recommended to use an admission policy +so that a namespace cannot be updated to add/remove this label, and that it must be added only at namespace creation time: + +```yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: user-defined-networks-namespace-label +spec: + matchConstraints: + resourceRules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["UPDATE"] + resources: ["namespaces"] + failurePolicy: Fail + validations: + - expression: "('k8s.ovn.org/primary-user-defined-network' in oldObject.metadata.labels) == ('k8s.ovn.org/primary-user-defined-network' in object.metadata.labels)" + message: "The 'k8s.ovn.org/primary-user-defined-network' label cannot be added/removed after the namespace was created" + +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: user-defined-networks-namespace-label-binding +spec: + policyName: user-defined-networks-namespace-label + validationActions: [Deny] + matchResources: + resourceRules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["UPDATE"] + resources: ["namespaces"] +``` + +The following conditions regarding the namespace label are: + +1. If namespace is missing the label, and a pod is created, it attaches to default network. +2. If the namespace is missing the label, and a primary UDN or CUDN is created that matches that namespace, the UDN/CUDN +will report error status and the NAD will not be generated. +3. If the namespace is missing the label, and a primary UDN/CUDN exists, a pod in the namespace will be created and +attached to default network. +4. If the namespace has the label, and a primary UDN/CUDN does not exist a pod in the namespace will fail creation until +the UDN/CUDN is created. + +Only one primary network may exist per namespace. If more than one user-defined network is created with the +"role" key set to primary, then future pod creations will return an error on CNI ADD until the network +configuration is corrected. + +A pod may not connect to multiple primary networks other than the cluster default. When the NAD is created, +OVN-Kubernetes will validate the configuration, as well as that no pods have been created in the namespace already. If +pods existed before the NAD was created, errors will be logged, and no further pods will be created in this namespace +until the network configuration is fixed. + +After creating the NAD, pods created in this namespace will connect to the newly defined network as their primary +network. The primaryNetwork key is used so that OVN-Kubernetes knows which network should be used, in case there are multiple +NADs created for a namespace (secondary networks). + +After a pod is created that shall connect to a user-defined network, it will then be annotated by OVN-Kubernetes with the +appropriate networking config: + +``` +trozet@fedora:~/Downloads$ oc get pods -o yaml -n ns1 +apiVersion: v1 +items: +- apiVersion: v1 + kind: Pod + metadata: + annotations: + k8s.ovn.org/pod-networks: | + { + "default": { + "ip_addresses": ["10.244.0.3/24"], + "mac_address": "0a:58:0a:f4:00:03", + "routes": [ + {"dest": "10.244.0.0/16", "nextHop": "10.244.0.1"}, + {"dest": "100.64.0.0/16", "nextHop": "10.244.0.1"} + ], + "ip_address": "10.244.0.3/24", + "role": "infrastructure-locked" + }, + "udn-test/l3-primary": { + "ip_addresses": ["10.20.2.5/24"], + "mac_address": "0a:58:0a:14:02:05", + "gateway_ips": ["10.20.2.1"], + "routes": [ + {"dest": "10.20.0.0/16", "nextHop": "10.20.2.1"}, + {"dest": "10.96.0.0/16", "nextHop": "10.20.2.1"}, + {"dest": "100.65.0.0/16", "nextHop": "10.20.2.1"} + ], + "ip_address": "10.20.2.5/24", + "gateway_ip": "10.20.2.1", + "role": "primary" + } + } + k8s.v1.cni.cncf.io/network-status: |- + [{ + "name": "ovn-kubernetes", + "interface": "eth0", + "ips": [ + "10.244.0.3" + ], + "mac": "0a:58:0a:f4:00:03", + "dns": {} + },{ + "name": "ovn-kubernetes", + "interface": "ovn-udn1", + "ips": [ + "10.20.2.5" + ], + "mac": "0a:58:0a:14:02:05", + "default": true, + "dns": {} + }] + creationTimestamp: "2025-04-22T18:50:50Z" + labels: + pod-name: client + name: client + namespace: udn-test + resourceVersion: "2093" + uid: dca1ff46-1990-4e84-a0a5-7c9fdde01993 +status: + podIP: 10.244.0.3 + podIPs: + - ip: 10.244.0.3 +``` + +In the above output the primary network is listed within the k8s.ovn.org/pod-networks annotation. It is also listed in +the network-status cncf annotation as "ovn-udn1". A user does not have to manually request that the pod is attached +to the primary network. The attachment to the cluster default network (CDN) and the primary UDN are done within the same +CNI ADD call. + +Multiple namespaces may also be configured to use the same network. In this case the underlying OVN network will be the +same, following a similar pattern to what is +[already supported today for secondary networks](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/multiple-networks/multi-homing.md). + +### IP Addressing + +As previously mentioned, one of the goals is to allow user-defined networks to have overlapping pod IP addresses. This +is enabled by allowing a user to configure what CIDR to use for pod addressing when they create the network. However, +this range cannot overlap with the default cluster CIDR used by the cluster default network today. + +Furthermore, the internal masquerade subnet and the Kubernetes service subnet will remain unique and will exist globally +to serve all networks. The masquerade subnet must be large enough to accommodate enough networks. Therefore, the +subnet size of the masquerade subnet is equal to the number of desired networks * 2, as we need 2 masquerade IPs per +network. The masquerade subnet remains localized to each node, so each node can use the same IP addresses and the size +of the subnet does not scale with number of nodes. + +The transit switch subnets may overlap between all networks. This network is just used for transport between nodes, and +is never seen by the pods or external clients. + +The join subnet of the default cluster network may not overlap with the join subnet of user-defined networks. This is +due to the fact that the pod is connected to the default network, as well as the user-defined primary network. The join +subnet is SNAT'ed by the GR of that network in order to facilitate ingress reply service traffic going back to the +proper GR, in case it traverses the overlay. For this reason, the pods may see this IP address and routes are added to +the pod to steer the traffic to the right interface (100.64.0.0/16 is the default cluster network join subnet): + +``` +[root@pod3 /]# ip route show +default via 10.244.1.1 dev eth0 +10.96.0.0/16 via 10.244.1.1 dev eth0 +10.244.0.0/16 via 10.244.1.1 dev eth0 +10.244.1.0/24 dev eth0 proto kernel scope link src 10.244.1.8 +100.64.0.0/16 via 10.244.1.1 dev eth0 +``` + +Since the pod needs routes for each join subnet, any layer 3 or layer 2 network that is attached to the pod needs a unique +join subnet. Consider a pod connected to the default cluster network, a user-defined, layer 3, primary network, and a +layer 2, secondary network: + +| Network | Pod Subnet | Node Pod Subnet | Join Subnet | +|-----------------|---------------|-----------------|---------------| +| Cluster Default | 10.244.0.0/16 | 10.244.0.0/24 | 100.64.0.0/16 | +| Layer 3 | 10.245.0.0/16 | 10.245.0.0/24 | 100.65.0.0/16 | +| Layer 2 | 10.246.0.0/16 | N/A | 100.66.0.0/16 | + + +The routing table would look like: + +``` +[root@pod3 /]# ip route show +default via 10.245.0.1 dev eth1 +10.96.0.0/16 via 10.245.0.1 dev eth1 +10.244.0.0/16 via 10.244.0.1 dev eth0 +10.245.0.0/16 via 10.245.0.1 dev eth1 +10.244.0.0/24 dev eth0 proto kernel scope link src 10.244.0.8 +10.245.0.0/24 dev eth1 proto kernel scope link src 10.245.0.8 +10.246.0.0/16 dev eth2 proto kernel scope link src 10.246.0.8 +100.64.0.0/16 via 10.244.0.1 dev eth0 +100.65.0.0/16 via 10.245.0.1 dev eth1 +100.66.0.0/16 via 10.246.0.1 dev eth2 +``` + +Therefore, when specifying a user-defined network it will be imperative to ensure that the networks a pod will connect to +do not have overlapping pod network or join network subnets. OVN-Kubernetes should be able to detect this scenario and +refuse to CNI ADD a pod with conflicts. + +### DNS + +DNS lookups will happen via every pod’s access to the DNS service on the cluster default network. CoreDNS lookups for +pods will resolve to the pod’s IP on the cluster default network. This is a limitation of the first phase of this feature +and will be addressed in a future enhancement. DNS lookups for services and external entities will function correctly. + +### Services + +Services in Kubernetes are namespace scoped. Any creation of a service in a namespace without a user-defined network +(using cluster default network as primary) will only be accessible by other namespaces also using the default network as +their primary network. Services created in namespaces served by user-defined networks, will only be accessible to +namespaces connected to the user-defined network. + +Since most applications require DNS and KAPI access, there is an exception to the above conditions where pods that are +connected to user-defined networks are still able to access KAPI and DNS services that reside on the cluster default +network. In the future, access to more services on the default network may be granted. However, that would require more +groundwork around enforcing network policy (which is evaluated typically after service DNAT) as potentially nftables +rules. Such work is considered a future enhancement and beyond the scope of this initial implementation. + +With this proposal, OVN-Kubernetes will check which network is being used for this namespace, and then only enable the +service there. The cluster IP of the service will only be available in the network of that service, except for KAPI and +DNS as previously explained. Host networked pods in a namespace with a user-defined primary network will also be limited +to only accessing the cluster IP of the services for that network. Load balancer IP and nodeport services are also +supported on user-defined networks. Service selectors are only able to select endpoints from the same namespace where the +service exists. Services that exist before the user-defined network is assigned to a namespace will result in +OVN-Kubernetes executing a re-sync on all services in that namespace, and updating all load balancers. Keep in mind that +pods must not exist in the namespace when the namespace is assigned to a new network or the new network assignment will +not be accepted by OVN-Kubernetes. + +Services in a user-defined network will be reachable by other namespaces that share the same network. + +As previously mentioned, Kubernetes API and DNS services will be accessible by all pods. + +Endpoint slices will provide the IPs of the cluster default network in Kubernetes API. For this implementation the required +endpoints are those IP addresses which reside on the user-defined primary network. In order to solve this problem, +OVN-Kubernetes may create its own endpoint slices or may choose to do dynamic lookups at runtime to map endpoints to +their primary IP address. Leveraging a second set of endpoint slices will be the preferred method, as it creates less +indirection and gives explicit Kube API access to what IP addresses are being used by OVN-Kubernetes. Read more about +the [endpoint slice mirroring implementation](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/multiple-networks/mirrored-endpointslices.md). + +Kubelet health checks to pods are queried via the cluster default network. When endpoints are considered unhealthy they +will be removed from the endpoint slice, and thus their primary IP will be removed from the OVN load balancer. However, +it is important to note that the healthcheck is being performed via the cluster default network interface on the pod, +which ensures the application is alive, but does not confirm network connectivity of the primary interface. Therefore, +there could be a situation where OVN networking on the primary interface is broken, but the default interface continues +to work and reports 200 OK to Kubelet, thus rendering the pod serving in the endpoint slice, but unable to function. +Although this is an unlikely scenario, it is good to document. + +### Network Policy + +Network Policy will be fully supported for user-defined primary networks as it is today with the cluster default network. +However, configuring network policies that allow traffic between namespaces that connect to different user-defined +primary networks will have no effect. This traffic will not be allowed, as the networks have no connectivity to each other. +These types of policies will not be invalidated by OVN-Kubernetes, but the configuration will have no effect. Namespaces +that share the same user-defined primary network will still benefit from network policy that applies access control over +a shared network. Additionally, policies that block/allow cluster egress or ingress traffic will still be enforced for +any user-defined primary network. + +### API Details + +Network Attachment Definitions (NADs) are the current way to configure the network in OVN-Kubernetes today, and the +method proposed in this enhancement. There are two major shortcomings of NAD: + +1. It has free-form configuration that depends on the CNI. There is no API validation of what a user enters, leading to + mistakes which are not caught at configuration time and may cause unexpected functional behavior at runtime. +2. It requires cluster admin RBAC in order to create the NAD. + +In order to address these issues, a proper CRD may be implemented which indirectly creates the NAD for OVN-Kubernetes. +This solution may consist of more than one CRD, namely an Admin based CRD and one that is namespace scoped for tenants. +The reasoning behind this is we want tenants to be able to create their own user-defined network for their namespace, +but we do not want them to be able to connect to another namespace’s network without permission. The Admin based version +would give higher level access and allow an administrator to create a network that multiple namespaces could connect to. +It may also expose more settings in the future for networks that would not be safe in the hands of a tenant, like +deciding if a network is able to reach other services in other networks. With tenants having access to be able to create +multiple networks, we need to consider potential attack vectors like a tenant trying to exhaust OVN-Kubernetes +resources by creating too many secondary networks. + +Furthermore, by utilizing a CRD, the status of the network CR itself can be used to indicate whether it is configured +by OVN-Kubernetes. For example, if a user creates a network CR and there is some problem (like pods already existed) then +an error status can be reported to the CR, rather than relying on the user to check OVN-Kubernetes logs. + +#### API Overview + +Two CRDs shall be introduced. Note for the final implementation, see the [official api](https://ovn-kubernetes.io/api-reference/userdefinednetwork-api-spec/). +- Namespace scoped CRD - represent user request for creating namespace scoped OVN network. + - Shall be defined with `namespace` scope. + - Targeted for cluster admin and non-admin users, enable creating OVN network in a specific namespace. +- Cluster scoped CRD - represent user request for creating cluster scoped OVN network, enables cross-namespace networking. + - Shall be defined with `cluster` scope + - Targeted for cluster admin users only, enable creating shared OVN network across multiple namespaces. + +Having a namespace-scope CRD targeted for admin and non-admin users, a cluster-scope CRD for admins only and utilizing RBAC +mechanism, enable allowing non-admin users create OVN networks in namespaces they permitted to with no admin intervention, +without the risk of destabilizing the cluster nodes or break the cluster network. + +There should be a finalizer on the CRDs, so that upon deletion OVN-Kubernetes can validate that there are no pods still using this network. +If there are pods still attached to this network, the network will not be removed. + +#### CRD Spec + +The CRDs spec defines as follows: + +| Field name | Description | optional | +|----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| Topology | The topological configuration for the network. Must be one of `Layer2`, `layer3`, `Localnet`. | No | +| Role | Select the network role in the pod, either `Primary` or `Secondary`. Primary network support topologies `Layer2` and `Layer3` only. | No | +| MTU | The maximum transmission unit (MTU).
The default is 1400. | Yes | +| Subnets | The subnet to use for the network across the cluster.
E.g. `10.100.200.0/24`.
IPv6 `2001:DBB::/64` and dual-stack `192.168.100.0/24`,`2001:DBB::/64` subnets are supported.
When omitted, the logical switch implementing the network only provides layer 2 communication, and users must configure IP addresses.
Port security only prevents MAC spoofing if the subnets are omitted. | Yes | +| ExcludeSubnets | List of CIDRs.
IP addresses are removed from the assignable IP address pool and are never passed to the pods. | Yes | +| JoinSubnets | Subnet used inside the OVN network topology. When omitted, this means no opinion and the platform is left to choose a reasonable default which is subject to change over time. | Yes | +| IPAM.Lifecycle | Control IP addresses management lifecycle. When `Persistent` is specified it enable workloads have persistent IP addresses. For example: Virtual Machines will have the same IP addresses along their lifecycle (stop, start migration, reboots). Supported by Topology `Layer2` & `Localnet`. | Yes | +| IPAM.Mode | Control how much of the IP configuration will be managed by OVN-Kubernetes. Must be one of `Enabled`, `Disabled`. | Yes | + +The cluster scoped CRD should have the following additional field: + +| Field name | Description | optional | +|-------------------|---------------------------------------------------------------------------------------------------------------|----------| +| NamespaceSelector | List of the standard `metav1.LabelSelector` selector for which namespace the network should be available for. | No | +| Template | The user defined network spec. | No | + +The template type should be the namespace scope CRD spec. + +> **Note:** The spec should be extended with care and strive to have minimal set of fields to provide nice abstraction for the NAD spec. + +#### CRD Status +The CRD status should reflect the NAD state through conditions. +For example, when the NAD its created the condition should be placed with status `True`. + +For cluster scoped networks, the condition should be true once all desired namespaces are provisioned with the corresponding NAD. + +The cluster scoped CRD status should reflect on which namespaces the network is available. + +#### Namespace scoped CRD +```golang +// UserDefinedNetwork describe network request for a Namespace +type UserDefinedNetwork struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + // +kubebuilder:validation:Required + // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="Spec is immutable" + // +kubebuilder:validation:XValidation:rule=" self.topology != 'Layer3' || (self.topology == 'Layer3' && size(self.subnets) > 0)", message="Subnets is required for Layer3 topology" + // +kubebuilder:validation:XValidation:rule="self.topology != 'Localnet' || (self.topology == 'Localnet' && self.role == 'Secondary')", message="Topology localnet is not supported for primary network" + // +kubebuilder:validation:XValidation:rule="!has(self.ipamLifecycle) || (self.ipamLifecycle == 'Persistent' && (self.topology == 'Layer2' || self.topology == 'Localnet'))", message="ipamLifecycle is supported for Layer2 and Localnet topologies" + // +required + Spec UserDefinedNetworkSpec `json:"spec"` + // +optional + Status UserDefinedNetworkStatus `json:"status,omitempty"` +} +``` +Suggested API validation rules: +- Spec defined as immutable +
+ Avoid incomplete state in a scenario where a NAD is created according to UDN spec, and pods connected to the network, + UDN spec changes, now pods connected to a network that was created from previous revision spec. +- `Subnets` are mandatory for `Layer3` topology. +- `Localnet` topology is not supported for primary network. +- `IPAM.Lifecycle` is supported for `Layer2` and `Localnet` topology. +- `IPAM.Mode` can be set to `Disabled` only on `Layer2` or `Localnet` topologies for `Secondary` networks, where the `Subnets` parameter must be omitted. When set to `Enabled`, the `Subnets` attribute must be defined. + +Suggested CRD short-name: `udn` + +```golang +// UserDefinedNetworkSpec defines the desired state of UserDefinedNetwork. +type UserDefinedNetworkSpec struct { + // The topological configuration for the network. + // +kubebuilder:validation:Enum=Layer2;Layer3;Localnet + Topology NetworkTopology `json:"topology"` + + // The network role in the pod (e.g.: Primary, Secondary). + // +kubebuilder:validation:Enum=Primary;Secondary + Role NetworkRole `json:"role"` + + // The maximum transmission unit (MTU). + // MTU is optional, if not provided the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. + // +optional + MTU uint `json:"mtu,omitempty"` + + + // The subnet to use for the pod network across the cluster. + // + // Dualstack clusters may set 2 subnets (one for each ip family), + // otherwise only 1 subnet is allowed. + // + // When topology is `Layer3`, the given subnet is split into smaller subnets for every node. + // To specify how the subnet should be split, the following format is supported for `Layer3` network: + // `10.128.0.0/16/24`, which means that every host will get a `/24` subnet. + // If host subnet mask is not set (for example, `10.128.0.0/16`), it will be assigned automatically. + // + // For `Layer2` and `Localnet` topology types, the format should match standard CIDR notation, without + // providing any host subnet mask. + // This field is required when `ipam.mode` is set to `Enabled` and is ignored otherwise. + // +optional + Subnets []string `json:"subnets,omitempty"` + + // A list of CIDRs. + // IP addresses are removed from the assignable IP address pool and are never passed to the pods. + // +optional + ExcludeSubnets []string `json:"excludeSubnets,omitempty"` + + // Subnet used inside the OVN network topology. + // This field is ignored for non-primary networks (e.g.: Role Secondary). + // When omitted, this means no opinion and the platform is left to choose a reasonable default which is subject to change over time. + // +kubebuilder:validation:XValidation:rule="1 <= size(self) && size(self) <= 2", message="Unexpected number of join subnets" + // +optional + JoinSubnets []string `json:"joinSubnets,omitempty"` + + // IPAM section contains IPAM-related configuration for the network. + IPAM *IPAMSpec `json:"ipam,omitempty"` +} + +type IPAMSpec struct { + // Mode controls how much of the IP configuration will be managed by OVN. + // `Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs + // from the selected subnet to the individual pods. + // `Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users + // configure IP addresses for the pods. + // `Disabled` is only available for `Layer2` and `Localnet` topologies for Secondary networks. + // By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function + // (such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network. + // Defaults to `Enabled`. + // +optional + Mode IPAMMode `json:"mode"` + + // Lifecycle controls IP addresses management lifecycle. + // + // The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an + // `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. + // Only supported when "mode" is `Enabled`. + // + // +optional + Lifecycle NetworkIPAMLifecycle `json:"lifecycle,omitempty"` +} + +``` +Suggested API validation rules: +- `Topology` and `Role` fields are mandatory. +- `Topology` can be one of `Layer2`, `Layer3`, `Localnet`. +- `Role` can be one of `Primary`, `Secondary`. +- `IPAM.Lifecycle` can be `Persistent`. +- `IPAM.Mode` can be set to `Disabled` only on `Layer2` or `Localnet` topologies for `Secondary` networks, where the `Subnets` parameter must be omitted. When set to `Enabled`, the `Subnets` attribute must be defined. +- `JoinSubnets` length can be 1 or 2. + +#### Cluster scoped CRD +```golang +// ClusterUserDefinedNetwork describes shared OVN network across namespaces request. +type ClusterUserDefinedNetwork struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + // +kubebuilder:validation:Required + // +required + Spec ClusterUserDefinedNetworkSpec `json:"spec"` + // +optional + Status ClusterUserDefinedNetworkStatus `json:"status,omitempty"` +} + +// ClusterUserDefinedNetwork defines the desired state of ClusterUserDefinedNetwork. +type ClusterUserDefinedNetwork struct { + // NamespaceSelector Label selector for which namespace network should be available for. + // +kubebuilder:validation:Required + // +required + NamespaceSelector metav1.LabelSelector `json:"namespaceSelector"` + + // Template is direct specification of UserDefinedNetwork. + // +kubebuilder:validation:Required + // +required + Template *UserDefinedNetworkTemplateSpec `json:"template"` +} + +// UserDefinedNetworkTemplateSpec UserDefinedNetwork spec template. +type UserDefinedNetworkTemplateSpec struct { + // UserDefinedNetworkSpec contains the UserDefinedNetwork specification. + Spec UserDefinedNetworkSpec `json:"spec,omitempty"` +} + +// ClusterUserDefinedNetworkStatus contains the observed status of the ClusterUserDefinedNetwork. +type ClusterUserDefinedNetworkStatus struct { + // ActiveNamespaces indicates in which namespaces network is available. + ActiveNamespaces []string `json:"activeNamespaces,omitempty"` + + Conditions []metav1.Condition `json:"conditions,omitempty"` +} +``` + +Suggested CRD short-name: `cudn` + +#### Example +Existing conditions that reflect if the Network Attachment Definition has been created, and if network allocation has been done for every +node: + +```yaml +status: + conditions: + - lastTransitionTime: "2025-05-13T18:01:05Z" + message: NetworkAttachmentDefinition has been created + reason: NetworkAttachmentDefinitionCreated + status: "True" + type: NetworkCreated + - lastTransitionTime: "2025-05-13T18:01:05Z" + message: Network allocation succeeded for all synced nodes. + reason: NetworkAllocationSucceeded + status: "True" + type: NetworkAllocationSucceeded +``` + +To be implemented condition reflecting the network readiness underlying OVN network state: +```yaml +status: + conditions: + - type: NetworkCreated + status: "True" + reason: OVNNetworkCreated + message: OVN network has been created +``` + +#### Example - Namespace scoped network +```yaml +kind: UserDefinedNetwork +metadata: + name: db-network + namespace: demo +spec: + topology: Layer2 + role: Primary + mtu: 9000 + subnets: ["10.0.0.0/24"] + excludeSubnets: ["10.0.0.0/26"] + ipamLifecycle: Persistent +``` + +After creation: +```yaml +kind: UserDefinedNetwork +metadata: + name: db-network + namespace: demo + finalizers: + - k8s.ovn.org/user-defined-network-protection +spec: + topology: Layer2 + role: Primary + mtu: 9000 + subnets: ["10.0.0.0/24"] + excludeSubnets: ["10.0.0.100/26"] + ipam: + lifecycle: Persistent +status: + conditions: + - lastTransitionTime: "2025-05-13T18:01:05Z" + message: NetworkAttachmentDefinition has been created + reason: NetworkAttachmentDefinitionCreated + status: "True" + type: NetworkCreated + - lastTransitionTime: "2025-05-13T18:01:05Z" + message: Network allocation succeeded for all synced nodes. + reason: NetworkAllocationSucceeded + status: "True" + type: NetworkAllocationSucceeded +``` + +#### Example - Cluster scoped network +```yaml +kind: ClusterUserDefinedNetwork +metadata: + name: db-network +spec: + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: + - "mynamespace" + - "theirnamespace" + template: + topology: Layer2 + role: Primary + mtu: 9000 + subnets: ["10.0.0.0/24"] + excludeSubnets: ["10.0.0.100/26"] +status: + conditions: + - lastTransitionTime: "2025-05-13T19:26:13Z" + message: 'NetworkAttachmentDefinition has been created in following namespaces: + [mynamespace, theirnamespace]' + reason: NetworkAttachmentDefinitionCreated + status: "True" + type: NetworkCreated +``` + +#### Tenant Use Case + +As a tenant I want to ensure when I create pods in my namespace their network traffic is isolated from other tenants on +the cluster. In order to ensure this, I first create a network CRD that is namespace scoped and indicate: + +- Type of network (Layer 3 or Layer 2) +- IP addressing scheme I wish to use (optional) +- Indicate this network will be the primary network + +After creating this CRD, I can check the status of the CRD to ensure it is actively being used as the primary network +for my namespace by OVN-Kubernetes. Once verified, I can now create pods and they will be in their own isolated SDN. + +#### Admin Use Case + +As an admin, I have a customer who has multiple namespaces and wants to connect them all to the same private network. In +order to accomplish this, I first create an admin network CRD that is cluster scoped and indicate: + +- Type of network (Layer 3 or Layer 2) +- IP addressing scheme I wish to use (optional) +- Indicate this network will be the primary network +- Selector to decide which namespaces may connect to this network. May use the ```kubernetes.io/metadata.name``` label to + guarantee uniqueness and eliminates the ability to falsify access. + +After creating the CRD, check the status to ensure OVN-Kubernetes has accepted this network to serve the namespaces +selected. Now tenants may go ahead and be provisioned their namespace. + +### Implementation Details + +#### CRD Implementation Details + +##### Cluster Manager UDN Controller + +New controller should be introduced, it should manage the new CRDs lifecycle. + +The controller should utilize the CNI plugin API and create OVN networks using `NetworkAttachmentDefinition` using the +`ovn-k8s-overlay-cni` CNI plugin according to desired spec. + +The controller should watch NAD object in order to reconcile them and reflect the network state in the CRDs status. + +The request corresponding NADs should be created with a finalizer, so that upon deletion OVN-Kubernetes can validate that +there are no pods still using this network. +If there are pods still attached to this network, the network will not be removed. + +The CRD spec should be validated before creating the NAD, see the [validation](#validations) section for more details. + +The controller should create the requested NAD with: +1. finalizer, enable the controller release resources before NAD is deleted, ensure no pod is connected to the network. +2. owner-reference referring to the request CR object. + Using the owner-reference mechanism should prevent deletion of the NAD before the corresponding CRD instance is deleted. + In addition, the owner-reference make teardown seamless, when the request CR object instance is deleted, + the cluster garbage collected will dispose the corresponding NAD when all finalizers are gone. + +In a scenario a NAD already exist at the target namespace, the controller should check if the existing NAD is managed +by the controller as follows: +1. Check the owner reference match the request CR UID. +2. OVN-K user-defined-network finalizer exist. +3. NAD spec correspond to desired spec. + +In case one of the previous checks fails, the controller should reconcile the request and +this state in the CR status network is not ready. + +In scenario of cluster-scope CRD request, the controller should do best effort to create the desired NAD in each +specified namespace. +In case on or more NAD creation fails (e.g.: due to namespace not exist), the controller should continue to the +next namespace. +When finished, reflect in the status all namespaces where NAD creation failed to create. + +##### General CRD flow +1. On namespaced-scope CRD creation: + - Validates the CRD spec. + - Generate NAD manifest from the CRD spec. + - Check that the desired NAD is not already exist. If not, create the NAD and return. + - Otherwise, verify the existing NAD correspond to desired spec, if so return. + - In case foreign NAD* already exist at the target namespace, raise an error and return. + - In case the NAD is malformed, reconcile it to desired state. + - Update the status as follows: + - Reflect the reconciliation errors, which namespaces failed and the reason. + +2. on cluster-scope CRD creation: + - Validates the CRD spec. + - Generate NAD manifest from the CRD spec. + - For each namespace specified in the spec: + - Check that the desired NAD does not already exist. If not, create the NAD and continue. + - Otherwise, verify the existing NAD correspond to desired spec, if so continue. + - In case foreign NAD* already exist at the target namespace, record an error and continue. + - In case the NAD is malformed, reconcile it to desired state. + - Update the status as follows: + - Reflect namespaces where network is available. + - Reflect the reconciliation errors, which namespaces failed and the reason. + +3. On namespace-scope CRD deletion: + - If no NAD exist, return. + - In case a NAD exist, ensure no pod specifying the network. + - In case no pod specifying the network, remove the finalizer, allowing the cluster garbage collector dispose the NAD object. + - Otherwise, reflect in the status the network is being deleted because its in use. + +4. On cluster-scope CRD deletion: +- For each namespace specified in the spec: +- In case a NAD exist, ensure no pod specifying the network. +- In case no pod specifying the network, remove the finalizer, allowing the cluster garbage collector dispose the NAD object. +- Otherwise, reflect in the status the network is cannot be deleted because its in use. + +> Note: +> NAD considered foreign when +> - Has the same `meta.namesapce` and `meta.name` as the requested NAD. +> - Has no owner-reference to the request CR object. + +##### NetworkAttachmentDefinition Rendering +The underlying OVN network ID (network-name) is represented by the NAD CNI's network config `name` field, and must be unique. + +The network-name is generated by the controller and should not be exposed for modification. + +Having the network-name unique and non-configurable, avoid the risk where a malicious entity could guess other networks name, +specify then in the spec and tap into other networks. + +The network-name should be composed for the subject CRD `metadata.namespace` and `metadata.name`, in the following format: +`mynamespace.myetwork` + +The `NetworkAttachmentDefinition` object `metadata.namespace` and `metadata.name` should correspond to the request CRD. + +Creating namespace scoped CRD instance should trigger creation of a corresponding NAD at the namespace the CRD instance reside. +Following the [example](#example---namespace-scoped-network), the following NAD should be created: +```yaml +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: db-network + namespace: demo + finalizers: + - k8s.ovn.org/user-defined-network-protection + ownerReferences: + - apiVersion: k8s.ovn.org/v1alpha1 + blockOwnerDeletion: true + kind: UserDefinedNetwork + name: db-network + uid: f45efb13-9511-48c1-95d7-44ee17c949f4 +spec: + config: > + '{ + "cniVersion":"0.3.1", + "mtu":1500, + "name":"demo.network", <--- generated unique network-name + "netAttachDefName":"demo/poc-db-network", + "subnets":"10.0.0.0/24", + "topology":"layer2", + "type":"ovn-k8s-cni-overlay", + "role": "primary", + "persistentIPs": "true" + }' +``` + +For cluster-scoped CRDs, the NAD `metadata.name` should correspond to the request CRD `metadata.name` with an additional prefix. +Having the prefix avoids conflicting with existing NADs who has the same `metadata.name`. +For example: +Given the CR meta.name is `db-network`,the NAD metadata.name will be `cluster.udn.db-network`. + +Creating cluster scoped CRD instance should trigger creation of the corresponding NAD at each namespace specified in the spec. +Following the above cluster-scope CRD [example](#example---cluster-scoped-network), the following NADs should be created: +```yaml +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: cluster-db-network <--- name starts with "cluster" + namespace: mynamespace + finalizers: + - k8s.ovn.org/user-defined-network-protection + ownerReferences: + - apiVersion: k8s.ovn.org/v1alpha1 + blockOwnerDeletion: true + kind: ClusterUserDefinedNetwork + name: db-network + uid: f45efb13-9511-48c1-95d7-44ee17c949f4 +spec: + config: > + '{ + "cniVersion":"0.3.1", + "excludeSubnets":"10.0.0.100/24", + "mtu":1500, + "name":"cluster.udn.db-network", <--- generated unique network-name + "netAttachDefName":"mynamespace/db-network", + "subnets":"10.0.0.0/24", + "topology":"layer2", + "type":"ovn-k8s-cni-overlay", + "role": "primary" + }' +--- +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: cluster-db-network <--- same name as in other nameapces + namespace: theirnamespace + finalizers: + - k8s.ovn.org/user-defined-network-protection + ownerReferences: + - apiVersion: k8s.ovn.org/v1alpha1 + blockOwnerDeletion: true + kind: ClusterUserDefinedNetwork + name: db-network + uid: f45efb13-9511-48c1-95d7-44ee17c949f4 +spec: + config: > + '{ + "cniVersion":"0.3.1", + "excludeSubnets":"10.0.0.100/24", + "mtu":1500, + "name":"cluster.udn.db-network", <--- same name as in other namespaces + "netAttachDefName":"theirnamespace/db-network", + "subnets":"10.0.0.0/24", + "topology":"layer2", + "type":"ovn-k8s-cni-overlay", + "role": "primary" + }' +``` + +##### Validations +The controller should validate the request CRD spec and verify: +- CIDRs are valid. +- `Subnets` length is at least 1 when topology is `Layer3`. +- `Topology` is one of `Layer2`, `Layer3` or `Localnet`. +- `Role` is one of `Primary` or `Secondary`. +- `IPAM.Lifecycle` can be `Persistent`, and set only when topology is `Layer2` or `Localnet`. +- In case `Topology: Localnet`, `Role` cannot be `Primary` + +In addition, the following scenarios should be validated: +- The join subnet shall not overlap with the configured cluster default network join subnet. + If there is overlap, OVN-Kubernetes should report an error in the request CR status. + +- When primary network is requested (i.e.: `spec.role: Primary`) + - Verify no primary network exist at the target namespace (i.e.: no NAD with `"primaryNetwork": "true"` exist). + In case primary-network already exist, the request CR status should reflect network is not ready because primary network + already exist at the target namespace. + +- In a scenario primary network created following CR request, and a primary NAD is created at the same target namespace, + the CR status should reflect there's a conflicting primary NAD. + +- When CRD instance is deleted, ensure the network is not in use before continuing with deletion process (e.g.: remove finalizer). + - Check no pod using the CRD instance corresponding NAD. + - In case at least on pod using the network, update the status to reflect network cannot be deleted because its being used. + +- When a managed NAD already exist at the target namespace: + - In case no owner-reference exist or owner-reference doesn't match the request CR object's UID, + the controller should re-enqueue the request and reflect the error in status saying a foreign NAD exist. + +- When there is existing primary network cluster-scope CR "net1" (`spec.role: Primary`) specifies namespace "A","B","C". + The admin create new primary network cluster-scope CR "net2" (`spec.role: Primary`) specifies namespaces "C","D","E". + - The "net2" network should not be ready at namespace "C", the status should reflect network is not ready because + primary network already exist in namespace "C". + +##### Best practices for managing OVN network using CRDs +- NAD should be managed by cluster-admin only, it's not recommended to grant non-admin users permissions to create/modify delete NADs. +- Managing user-defined-networks should be done using the suggested CRDs. +
+ Creating user-defined-networks using NADs may introduce unexpected behaviour and collisions with NAD object the controller manage. +- Managed NAD object should not be deleted manually, in order to delete a network, the corresponding CR instance should be deleted. +- Only one primary network per namespace is supported. +- Make sure no workloads exist on a namespace before creating primary network in that namespace. +- For the cluster-scoped CRD, its recommended to use `kubernetes.io/metadata.name` label to specify the target namespaces. + +#### UDN Functional Implementation Details + +OVN offers the ability to create multiple virtual topologies. As with secondary networks in OVN-Kubernetes today, +separate topologies are created whenever a new network is needed. The same methodology will be leveraged for this design. +Whenever a new network of type layer 3 or layer 2 is requested, a new topology will be created for that network where +pods may connect to. + +The limitation today with secondary networks is that there is only support for east/west traffic. This RFE will address +adding support for user-defined primary and secondary network north/south support. In order to support north/south +traffic, pods on different networks need to be able to egress, typically using the host’s IP. Today in shared gateway +mode we use a Gateway Router (GR) in order to provide this external connectivity, while in local gateway mode, the host +kernel handles SNAT’ing and routing out egress traffic. Ingress traffic also follows similar, and reverse paths. There +are some exceptions to these rules: + +1. MEG traffic always uses the GR to send and receive traffic. +2. Egress IP on the primary NIC always uses the GR, even in local gateway mode. +3. Egress Services always use the host kernel for egress routing. + +To provide an ingress/egress point for pods on different networks the most simple solution may appear to be to connect +them all to a single gateway router. This introduces an issue where now networks are all connected to a single router, +and there may be routing happening between networks that were supposed to be isolated from one another. Furthermore in +the future, we will want to extend these networks beyond the cluster, and to do that in OVN would require making a +single router VRF aware, which adds more complexity into OVN. + +The proposal here is to create a GR per network. With this topology, OVN will create a patch port per network to the +br-ex bridge. OVN-Kubernetes will be responsible for being VRF/network aware and forwarding packets via flows in br-ex +to the right GR. Each per-network GR will only have load balancers configured on it for its network, and only be able to +route to pods in its network. The logical topology would look something like this, if we use an example of having a +cluster default primary network, a layer 3 primary network, and a layer 2 primary network: + +![VRF Topology](../images/VRFs.svg) + +In the above diagram, each network is assigned a unique conntrack zone and conntrack mark. These are required in order +to be able to handle overlapping networks egressing into the same VRF and SNAT’ing to the host IP. Note, the default +cluster network does not need to use a unique CT mark or zone, and will continue to work as it does today. This is due +to the fact that no user-defined network may overlap with the default cluster subnet. More details in the next section. + +#### Shared Gateway Mode + +##### Pod Egress + +On pod egress, the respective GR of that network will handle doing the SNAT to a unique masquerade subnet IP assigned to +this network. For example, in the above diagram packets leaving GR-layer3 would be SNAT’ed to 169.254.169.5 in zone 64005. +The packet will then enter br-ex, where flows in br-ex will match this packet, and then SNAT the packet to the node IP +in zone 0, and apply its CT mark of 5. Finally, the packet will be recirculated back to table 0, where the packet will +be CT marked with 1 in zone 64000, and sent out of the physical interface. In OVN-Kubernetes we use zone 64000 to track +things from OVN or the host and additionally, we mark packets from OVN with a CT Mark of 1 and packets from the host +with 2. Pseudo openflow rules would look like this (assuming node IP of 172.18.0.3): + +``` +pod-->GR(snat, 169.254.169.5, zone 64005)->br-ex(snat, 172.18.0.3, zone 0, mark 5, table=0) -->recirc table0 (commit +zone 64000, mark 1) -->eth0 +``` + +The above design will accommodate for overlapping networks with overlapping ports. The worst case scenario is if two +networks share the same address space, and two pods with identical IPs are trying to connect externally using the same +source and destination port. Although unlikely, we have to plan for this type of scenario. When each pod tries to send a +packet through their respective GR, SNAT’ing to the unique GR masquerade IP differentiates the conntrack entries. Now, +when the final SNAT occurs in br-ex with zone 0, they can be determined as different connections via source IP, and when +SNAT’ing to host IP, conntrack will detect a collision using the same layer 4 port, and choose a different port to use. + +When reply traffic comes back into the cluster, we must now submit the packet to conntrack to find which network this +traffic belongs to. The packet is always first sent into zone 64000, where it is determined whether this packet +belonged to OVN (CT mark of 1) or the host. Once identified by CT mark as OVN traffic, the packet will then be unSNAT’ed +in zone 0 via br-ex rules and the CT mark restored of which network it belonged to. Finally, we can send the packet to +the correct GR via the right patch port, by matching on the restored CT Mark. From there, OVN will handle unSNAT’ing the +masquerade IP and forward the packet to the original pod. + +To support KubeVirt live migration the GR LRP will have an extra address with the configured gateway for the layer2 +subnet (to allow the gateway IP to be independent of the node where the VM is running on). After live migration succeeds, +OVN should send a GARP for VMs to clean up its ARP tables since the gateway IP has different mac now. + +The live migration feature at layer 2 described here will work only with OVN interconnect (OVN IC, which is used by OCP). +Since there is no MAC learning between zones, so we can have the same extra address on every gateway router port, basically +implementing anycast for this SVI address. + +Following is a picture that illustrate all these bits with a topology + +![Layer 2 Egress Topology](../images/multi-homing-l2-gw.svg) + +##### Services + +When ingress service traffic enters br-ex, there are flows installed that steer service traffic towards the OVN GR. With +additional networks, these flows will be modified to steer traffic to the correct GR-<network>’s patch port. + +When a host process or host networked pod on a Kubernetes node initiates a connection to a service, iptables rules will +DNAT the nodeport or loadbalancer IP into the cluster IP, and then send the traffic via br-ex where it is masqueraded +and sent into the OVN GR. These flows can all be modified to detect the service IP and then send to the correct +GR-<network> patch port. For example, in the br-ex (breth0) bridge today we have flows that match on packets sent +to the service CIDR (10.96.0.0/24): + +``` +[root@ovn-worker ~]# ovs-ofctl dump-flows breth0 table=0 | grep 10.96 + cookie=0xdeff105, duration=22226.373s, table=0, n_packets=41, n_bytes=4598, idle_age=19399,priority=500,ip,in_port=LOCAL,nw_dst=10.96.0.0/16 actions=ct(commit,table=2,zone=64001,nat(src=169.254.169.2)) +``` + +Packets that are destined to the service CIDR are SNAT'ed to the masquerade IP of the host (169.254.169.2) and then +sent to the dispatch table 2: + +``` +[root@ovn-worker ~]# ovs-ofctl dump-flows breth0 table=2 + cookie=0xdeff105, duration=22266.310s, table=2, n_packets=41, n_bytes=4598, actions=mod_dl_dst:02:42:ac:12:00:03,output:"patch-breth0_ov" +``` + +In the above flow, all packets have the dest MAC address changed to be that of the OVN GR, and then sent on the patch port +towards the OVN GR. With multiple networks, host access to cluster IP service flows will now be modified to be on a per +cluster IP basis. For example, if we assume two services exist on two user defined namespaces with cluster IPs 10.96.0.5 +and 10.96.0.6. The flows would look like: + +``` +[root@ovn-worker ~]# ovs-ofctl dump-flows breth0 table=0 | grep 10.96 + cookie=0xdeff105, duration=22226.373s, table=0, n_packets=41, n_bytes=4598, idle_age=19399,priority=500,ip,in_port=LOCAL,nw_dst=10.96.0.5 actions=set_field:2->reg1,ct(commit,table=2,zone=64001,nat(src=169.254.169.2)) + cookie=0xdeff105, duration=22226.373s, table=0, n_packets=41, n_bytes=4598, idle_age=19399,priority=500,ip,in_port=LOCAL,nw_dst=10.96.0.6 actions=set_field:3->reg1,ct(commit,table=2,zone=64001,nat(src=169.254.169.2)) +``` + +The above flows are now per cluster IP and will send the packet to the dispatch table while also setting unique register +values to differentiate which OVN network these packets should be delivered to: + +``` +[root@ovn-worker ~]# ovs-ofctl dump-flows breth0 table=2 + cookie=0xdeff105, duration=22266.310s, table=2, n_packets=41, n_bytes=4598, reg1=0x2 actions=mod_dl_dst:02:42:ac:12:00:05,output:"patch-breth0-net1" + cookie=0xdeff105, duration=22266.310s, table=2, n_packets=41, n_bytes=4598, reg1=0x3 actions=mod_dl_dst:02:42:ac:12:00:06,output:"patch-breth0-net2" +``` + +Furthermore, host networked pod access to services will be restricted to the network it belongs to. For more information +see the [Host Networked Pods](#host-networked-pods) section. + +Additionally, in the case where there is hairpin service traffic to the host +(Host->Service->Endpoint is also the host), the endpoint reply traffic will need to be distinguishable on a per network +basis. In order to achieve this, each OVN GR’s unique masquerade IP will be leveraged. + +For service access towards KAPI/DNS or potentially other services on the cluster default network, there are two potential +technical solutions. Assume eth0 is the pod interface connected to the cluster default network, and eth1 is connected to the +user-defined primary network: + +1. Add routes for KAPI/DNS specifically into the pod to go out eth0, while all other service access will go to eth1. + This will then just work normally with the load balancers on the switches for the respective networks. + +2. Do not send any service traffic out of eth0, instead all service traffic goes to eth1. In this case all service + traffic is flowing through the user-defined primary network, where only load balancers for that network are configured + on that network's OVN worker switch. Therefore, packets to KAPI/DNS (services not on this network) are not DNAT'ed at + the worker switch and are instead forwarded onwards to the ovn_cluster_router_<user-defined network> or + GR-<node-user-defined-network> for layer 3 or layer 2 networks, respectively . This router is + configured to send service CIDR traffic to ovn-k8s-mp0-<user-defined network>. IPTables rules in the host only permit + access to KAPI/DNS and drop all other service traffic coming from ovn-k8s-mp0-<user-defined network>. The traffic then + gets routed to br-ex and default GR where it hits the OVN load balancer there and forwarded to the right endpoint. + +While the second option is more complex, it allows for not configuring routes to service addresses in the pod that could +hypothetically change. + +##### Egress IP + +This feature works today by labeling and choosing a node+network to be used for egress, and then OVN logical routes and +logical route policies are created which steer traffic from a pod towards a specific gateway router (for primary network +egress). From there the packets are SNAT’ed by the OVN GR to the egress IP, and sent to br-ex. Egress IP is cluster +scoped, but applies to selected namespaces, which will allow us to only apply the SNAT and routes to the GR and OVN +topology elements of that network. In the layer 3 case, the current design used today for the cluster default primary +network will need some changes. Since Egress IP may be served on multiple namespaces and thus networks, it is possible +that there could be a collision as previously mentioned in the Pod Egress section. Therefore, the same solution provided +in that section where the GR SNATs to the masquerade subnet must be utilized. However, once the packet arrives in br-ex +we will need a way to tell if it was sent from a pod affected by a specific egress IP. To address this, pkt_mark will be +used to mark egress IP packets and signify to br-ex which egress IP to SNAT to. An example where the egress IP is +1.1.1.1 that maps to pkt_mark 10 would look something like this: + +![Egress IP VRF SGW](../images/egress-ip-vrf-sgw.svg) + +For layer 2, egress IP has never been supported before. With the IC design, there is no need to have an +ovn_cluster_router and join switch separating the layer 2 switch network (transit switch) from the GR. Non-IC will +not be supported. In the layer 2 IC model, GRs per node on a network will all be connected to the layer 2 transit switch: + +![Egress IP Layer 2](../images/egress-ip-l2-primary.svg) + +In the above diagram, Node 2 is chosen to be the egress IP node for any pods in namespace A. Pod 1 and Pod 2 have +default gateway routes to their respective GR on their node. When egress traffic leaves Pod 2, it is sent towards its +GR-A on node 2, where it is SNAT’ed to the egress IP and the traffic sent to br-ex. For Pod 1, its traffic is sent to +its GR-A on Node 1, where it is then rerouted towards GR-A on Node 2 for egress. + +##### Egress Firewall + +Egress firewall is enforced at the OVN logical switch, and this proposal has no effect on its functionality. + +##### Egress QoS + +Egress QoS is namespace scoped and functions by marking packets at the OVN logical switch, and this proposal has no +effect on its functionality. + +##### Egress Service + +Egress service is namespace scoped and its primary function is to SNAT egress packets to a load balancer IP. As +previously mentioned, the feature works the same in shared and local gateway mode, by leveraging the local gateway mode +path. Therefore, its design will be covered in the Local Gateway Mode section of the Design Details. + +##### Multiple External Gateways (MEG) / Admin Based Policy Routing (ABPR) + +There will be no support for MEG or pod direct ingress on any network other than the primary, cluster default network. +This support may be enhanced later by extending VRFs/networks outside the cluster. + +#### Local Gateway Mode + +With local gateway mode, egress/ingress traffic uses the kernel’s networking stack as a next hop. OVN-Kubernetes +leverages an interface named ā€œovn-k8s-mp0ā€ in order to facilitate sending traffic to and receiving traffic from the +host. For egress traffic, the host routing table decides where to send the egress packet, and then the source IP is +masqueraded to the node IP of the egress interface. For ingress traffic, the host routing table steers packets destined +for pods via ovn-k8s-mp0 and SNAT’s the packet to the interface address. + +For multiple networks to use local gateway mode, some changes are necessary. The ovn-k8s-mp0 port is a logical port in +the OVN topology tied to the cluster default network. There will need to be multiple ovn-k8s-mp0 ports created, one per +network. Additionally, all of these ports cannot reside in the default VRF of the host network. Doing so would result in +an inability to have overlapping subnets, as well as the host VRF would be capable of routing packets between namespace +networks, which is undesirable. Therefore, each ovn-k8s-mp0-<network> interface must be placed in its own VRF: + +![Local GW Node Setup](../images/local-gw-node-setup-vrfs.svg) + +The VRFs will clone the default routing table, excluding routes that are created by OVN-Kubernetes for its networks. +This is similar to the methodology in place today for supporting +[Egress IP with multiple NICs](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/cluster-egress-controls/egress-ip.md#egressip-ip-is-assigned-to-a-secondary-host-interface). + +##### Pod Egress + +Similar to the predicament outlined in Shared Gateway mode, we need to solve the improbable case where two networks have +the same address space, and pods with the same IP/ports are trying to talk externally to the same server. In this case, +OVN-Kubernetes will reserve an extra IP from the masquerade subnet per network. This masquerade IP will be used to SNAT +egress packets from pods leaving via mp0. The SNAT will be performed by ovn_cluster_router for layer 3 networks and +the gateway router (GR) for layer 2 networks using configuration like: + +``` +[root@ovn-worker ~]# ovn-nbctl lr-nat-list daac7843-ad73-4b73-b415-e432a28f0d61 +TYPE GATEWAY_PORT MATCH EXTERNAL_IP EXTERNAL_PORT LOGICAL_IP EXTERNAL_MAC LOGICAL_PORT +snat eth.dst == 0a:58:0 169.254.0.100 10.20.1.0/24 +``` + +Now when egress traffic arrives in the host via mp0, it will enter the VRF, where clone routes will route the packet as +if it was in the default VRF out a physical interface, typically towards br-ex, and the packet is SNAT’ed to the host IP. + +When the egress reply comes back into the host, iptables will unSNAT the packet and the destination will be +169.254.169.100. At this point, an ip rule will match the destination on the packet and do a lookup in the VRF where a +route specifying 169.254.169.100/32 via 10.244.0.1 will cause the packet to be sent back out the right mp0 port for the +respective network. + +Note, the extra masquerade SNAT will not be required on the cluster default network's ovn-k8s-mp0 port. This will +preserve the previous behavior, and it is not necessary to introduce this SNAT since the default cluster network subnet +may not overlap with user-defined networks. + +##### Services + +Local gateway mode services function similar to the behavior described in host -> service description in the Shared +Gateway Mode Services section. When the packet enters br-ex, it is forwarded to the host, where it is then DNAT’ed to +the cluster IP and typically sent back into br-ex towards the OVN GR. This traffic will behave the same as previously +described. There are some exceptions to this case, namely when external traffic policy (ETP) is set to local. In this +case traffic is DNAT’ed to a special masquerade IP (169.254.169.3) and sent via ovn-k8s-mp0. There will need to be IP +rules to match on the destination node port and steer traffic to the right VRF for this case. Additionally, with internal +traffic policy (ITP) is set to local, packets are marked in the mangle table and forwarded via ovn-k8s-mp0 with an IP +rule and routing table 7. This logic will need to ensure the right ovn-k8s-mp0 is chosen for this case as well. + +##### Egress IP + +As previously mentioned, egress IP on the primary NIC follows the pathway of shared gateway mode. The traffic is not +routed by the kernel networking stack as a next hop. However, for multi-nic support, packets are sent into the kernel +via the ovn-k8s-mp0 port. Here the packets are matched on, sent to an egress IP VRF, SNAT’ed and sent out the chosen +interface. The detailed steps for a pod with IP address 10.244.2.3 affected by egress IP look like: + +1. Pod sends egress packet, arrives in the kernel via ovn-k8s-mp0 port, the packet is marked with 1008 (0x3f0 in hex) + if it should skip egress IP. It has no mark if the packet should be affected by egress IP. +2. IP rules match the source IP of the packet, and send it into an egress IP VRF (rule 6000): + + ``` + sh-5.2# ip rule + + 0: from all lookup local + 30: from all fwmark 0x1745ec lookup 7 + 5999: from all fwmark 0x3f0 lookup main + 6000: from 10.244.2.3 lookup 1111 + 32766: from all lookup main + 32767: from all lookup default + ``` + +3. Iptables rules save the packet mark in conntrack. This is only applicable to packets that were marked with 1008 and + are bypassing egress IP: + + ``` + sh-5.2# iptables -t mangle -L PREROUTING + + Chain PREROUTING (policy ACCEPT) + target prot opt source destination + CONNMARK all -- anywhere anywhere mark match 0x3f0 CONNMARK save + CONNMARK all -- anywhere anywhere mark match 0x0 CONNMARK restore + ``` + +4. VRF 1111 has a route in it to steer the packet to the right egress interface: + + ``` + sh-5.2# ip route show table 1111 + default dev eth1 + ``` + +5. IPTables rules in NAT table SNAT the packet: + + ``` + -A OVN-KUBE-EGRESS-IP-MULTI-NIC -s 10.244.2.3/32 -o eth1 -j SNAT --to-source 10.10.10.100 + ``` + +6. For reply bypass traffic, the 0x3f0 mark is restored, and ip rules 5999 send it back into default VRF for routing + back into mp0 for non-egress IP packets. This is rule and connmark restoring is required for the packet to pass the + reverse path filter (RPF) check. For egress IP reply packets, there is no connmark restored and the packets hit the + default routing table to go back into mp0. + +This functionality will continue to work, with ip rules steering the packets from the per network VRF to the appropriate +egress IP VRF. CONNMARK will continue to be used so that return traffic is sent back to the correct VRF. Step 5 in the +above may need to be tweaked to match on mark in case 2 pods have overlapping IPs, and are both egressing +the same interface with different Egress IPs. The flow would look something like this: + +![Egress IP VRF LGW](../images/egress-ip-vrf-lgw.svg) + +##### Egress Firewall + +Egress firewall is enforced at the OVN logical switch, and this proposal has no effect on its functionality. + + +##### Egress QoS + +Egress QoS is namespace scoped and functions by marking packets at the OVN logical switch, and this proposal has no +effect on its functionality. + +##### Egress Service + +Egress service functions similar to Egress IP in local gateway mode, with the exception that all traffic paths go +through the kernel networking stack. Egress Service also uses IP rules and VRFs in order to match on traffic and forward +it out the right network (if specified in the CRD). It uses iptables in order to SNAT packets to the load balancer IP. +Like Egress IP, with user-defined networks there will need to be IP rules with higher precedence to match on packets from +specific networks and direct them to the right VRF. + +##### Multiple External Gateways (MEG) + +There will be no support for MEG or pod direct ingress on any network other than the primary, cluster default network. +Remember, MEG works the same way in local or shared gateway mode, by utilizing the shared gateway path. This support may +be enhanced later by extending VRFs/networks outside the cluster. + +#### Kubernetes Readiness/Liveness Probes + +As previously mentioned, Kubelet probes will continue to work. This includes all types of probes such as TCP, HTTP or +GRPC. Additionally, we want to restrict host networked pods in namespaces that belong to user-defined networks from +being able to access pods in other networks. For that reason, we need to block host networked pods from being able to +access pods via the cluster default network. In order to do this, but still allow Kubelet to send probes; the cgroup +module in iptables will be leveraged. For example: + +``` +root@ovn-worker:/# iptables -L -t raw -v +Chain PREROUTING (policy ACCEPT 6587 packets, 1438K bytes) + pkts bytes target prot opt in out source destination + +Chain OUTPUT (policy ACCEPT 3003 packets, 940K bytes) + pkts bytes target prot opt in out source destination + 3677 1029K ACCEPT all -- any any anywhere anywhere cgroup kubelet.slice/kubelet.service + 0 0 ACCEPT all -- any any anywhere anywhere ctstate ESTABLISHED + 564 33840 DROP all -- any any anywhere 10.244.0.0/16 +``` + +From the output we can see that traffic to the pod network ```10.244.0.0/16``` will be dropped by default. However, +traffic coming from kubelet will be allowed. + +#### Host Networked Pods + +##### VRF Considerations + +By encompassing VRFs into the host, this introduces some constraints and requirements for the behavior of host networked +type pods. If a host networked pod is created in a Kubernetes namespace that has a user-defined network, it should be +confined to only talking to ovn-networked pods on that same user-defined network. + +With Linux VRFs, different socket types behave differently by default. Raw, unbound sockets by default are allowed to +listen and span multiple VRFs, while TCP, UDP, SCTP and other protocols are restricted to the default VRF. There are +settings to control this behavior via sysctl, with the defaults looking like this: + +``` +trozet@fedora:~/Downloads/ip-10-0-169-248.us-east-2.compute.internal$ sudo sysctl -A | grep net | grep l3mdev +net.ipv4.raw_l3mdev_accept = 1 +net.ipv4.tcp_l3mdev_accept = 0 +net.ipv4.udp_l3mdev_accept = 0 +``` + +Note, there is no current [support in the kernel for SCTP](https://lore.kernel.org/netdev/bf6bcf15c5b1f921758bc92cae2660f68ed6848b.1668357542.git.lucien.xin@gmail.com/), +and it does not look like there is support for IPv6. Given the desired behavior to restrict host networked pods to +talking to only pods in their namespace/network, it may make sense to set raw_l3dev_accept to 0. This is set to 1 by +default to allow legacy ping applications to work over VRFs. Furthermore, a user modifying sysctl settings to allow +applications to listen across all VRFs will be unsupported. Reasons include there can be odd behavior and interactions +that occur with applications communicating across multiple VRFs, as well as the fact that this would break the native +network isolation paradigm offered by this feature. + +For host network pods to be able to communicate with pod IPs on their user-defined network, the only supported method +will be for the applications to bind their socket to the VRF device. Many applications will not be able to support this, +so in the future it makes sense to come up with a better solution. One possibility is to use ebpf in order to intercept +the socket bind call of an application (that typically will bind to INADDR_ANY) and force it to bind to the VRF device. +Note, host network pods will still be able to communicate with pods via services that belong to its user-defined network +without any limitations. See the next section [Service Access](#service-access) for more information. + +Keep in mind that if a host network pod runs and does not bind to the VRF device, it will be able to communicate on the +default VRF. This means the host networked pod will be able to talk to other host network pods. However, due to nftables +rules in the host however, it will not be able to talk to OVN networked pods via the default cluster network/VRF. + +For more information on how VRFs function in Linux and the settings discussed in this section, refer to +[https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/Documentation/networking/vrf.rst?h=v6.1](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/Documentation/networking/vrf.rst?h=v6.1) for +more details. + +##### Service Access + +Host networked pods in a user-defined network will be restricted to only accessing services in either: +1. The cluster default network. +2. The user-defined network in which the host networked pod's namespace belongs to. + +This will be enforced by iptables/nftables rules added that match on the cgroup of the host networked pod. For example: + +``` +root@ovn-worker:/# iptables -L -t raw -v +Chain PREROUTING (policy ACCEPT 60862 packets, 385M bytes) + pkts bytes target prot opt in out source destination + +Chain OUTPUT (policy ACCEPT 36855 packets, 2504K bytes) + pkts bytes target prot opt in out source destination + 17 1800 ACCEPT all -- any any anywhere 10.96.0.1 cgroup /kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod992d3b9e_3f85_42e2_9558_9d4273d4236f.slice +23840 6376K ACCEPT all -- any any anywhere anywhere cgroup kubelet.slice/kubelet.service + 0 0 ACCEPT all -- any any anywhere anywhere ctstate ESTABLISHED + 638 37720 DROP all -- any any anywhere 10.244.0.0/16 + 28 1440 DROP all -- any any anywhere 10.96.0.0/16 +``` +In the example above, access to the service network of ```10.96.0.0/16``` is denied by default. However, one host networked +pod is given access to the 10.96.0.1 cluster IP service, while other host networked pods are blocked from access. + +### Testing Details + +* E2E upstream CI jobs covering supported features across multiple networks. +* E2E tests which ensure network isolation between OVN networked and host networked pods, services, etc. +* E2E tests covering network subnet overlap and reachability to external networks. +* Scale testing to determine limits and impact of multiple user-defined networks. This is not only limited to OVN, but + also includes OVN-Kubernetes’ design where we spawn a new network controller for every new network created. +* Integration testing with other features like IPSec to ensure compatibility. +* E2E tests verify the expected NAD is generated according to CRDs spec. +* E2E tests verify workloads on different namespaces connected to namespace scoped OVN network with the same name cannot communicate. +* E2E tests verify workloads on different namespaces connected to cluster-scope OVN network can communicate. + +### Documentation Details + +* ovn-kubernetes.io will be updated with a UDN user guide, and a support matrix showing what features are supported + with UDN. +* Additional dev guides will be added to the repo to show how the internal design of UDN is implemented. + +## Risks, Known Limitations and Mitigations + +### Risks and Mitigations + +The biggest risk with this feature is hitting scale limitations. With many namespaces and networks, the number of +internal OVN objects will multiply, as well as internal kernel devices, rules, VRFs. There will need to be a large-scale +effort to determine how many networks we can comfortably support. + +Following the introduction of a CRD for non-admin users create OVN network, there is a risk a non-admin users could +cause node / OVN resources starvation due to creating too many OVN networks. +To mitigates it, CRDs controller could monitor how many OVN network exists and reject new ones in case a given limit is exceeded. + +Alternatively, OVN-K resources should be exposed as node resource (using the device-plugin API). +Once a node resource is exposed, it will enable using the [resource-quota API](https://kubernetes.io/docs/concepts/policy/resource-quotas/#resource-quota-for-extended-resources) +and put boundaries on how many networks could exist. + +There is also a risk of breaking secondary projects that integrate with OVN-Kubernetes, such as Metal LB or Submariner. + +### Drawbacks + +As described in the Design Details section, this proposal will require reserving two IPs per network in the masquerade +subnet. This is a private subnet only used internally by OVN-Kubernetes, but it will require increasing the subnet size +in order to accommodate multiple networks. Today this subnet by default is configured as a /29 for IPv4, and only 6 IP +addresses are used. With this new design, users will need to reconfigure their subnet to be large enough to hold the +desired number of networks. Note, API changes will need to be made in order to support changing the masquerade subnet +post-installation. + +## OVN Kubernetes Version Skew + +UDN will be delivered in version 1.1.0. + +## Alternatives + +None + +## References + +None \ No newline at end of file diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index 381d8d647c..2dc1189c62 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -546,11 +546,15 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util // register ovnkube node specific prometheus metrics exported by the node metrics.RegisterNodeMetrics(ctx.Done()) - ovsClient, err = libovsdb.NewOVSClient(ctx.Done()) - if err != nil { - nodeErr = fmt.Errorf("failed to initialize libovsdb vswitchd client: %w", err) - return + // OVS is not running on dpu-host nodes + if config.OvnKubeNode.Mode != types.NodeModeDPUHost { + ovsClient, err = libovsdb.NewOVSClient(ctx.Done()) + if err != nil { + nodeErr = fmt.Errorf("failed to initialize libovsdb vswitchd client: %w", err) + return + } } + nodeControllerManager, err := controllermanager.NewNodeControllerManager( ovnClientset, watchFactory, diff --git a/go-controller/hack/update-codegen.sh b/go-controller/hack/update-codegen.sh index ee86a1c3ec..66e26375a8 100755 --- a/go-controller/hack/update-codegen.sh +++ b/go-controller/hack/update-codegen.sh @@ -27,6 +27,18 @@ if [[ "${builddir}" == /tmp/* ]]; then #paranoia rm -rf "${builddir}" fi +# Helper function to get API version for a given CRD +get_crd_version() { + case "$1" in + networkqos) + echo "v1alpha1" + ;; + *) + echo "v1" + ;; + esac +} + # deepcopy for types deepcopy-gen \ --go-header-file hack/boilerplate.go.txt \ @@ -39,52 +51,54 @@ for crd in ${crds}; do # for types we already generated deepcopy above which is all we need [ "$crd" = "types" ] && continue - echo "Generating deepcopy funcs for $crd" + api_version=$(get_crd_version "${crd}") + + echo "Generating deepcopy funcs for $crd ($api_version)" deepcopy-gen \ --go-header-file hack/boilerplate.go.txt \ --output-file zz_generated.deepcopy.go \ --bounding-dirs github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd \ - github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1 \ + github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version} \ "$@" - echo "Generating apply configuration for $crd" + echo "Generating apply configuration for $crd ($api_version)" applyconfiguration-gen \ --go-header-file hack/boilerplate.go.txt \ - --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/v1/apis/applyconfiguration \ - --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/applyconfiguration \ - github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1 \ + --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/${api_version}/apis/applyconfiguration \ + --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/applyconfiguration \ + github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version} \ "$@" - echo "Generating clientset for $crd" + echo "Generating clientset for $crd ($api_version)" client-gen \ --go-header-file hack/boilerplate.go.txt \ --clientset-name "${CLIENTSET_NAME_VERSIONED:-versioned}" \ --input-base "" \ - --input github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1 \ - --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/v1/apis/clientset \ - --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/clientset \ - --apply-configuration-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/applyconfiguration \ - --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements" \ + --input github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version} \ + --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/${api_version}/apis/clientset \ + --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/clientset \ + --apply-configuration-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/applyconfiguration \ + --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements,NetworkQoS:NetworkQoSes" \ "$@" - echo "Generating listers for $crd" + echo "Generating listers for $crd ($api_version)" lister-gen \ --go-header-file hack/boilerplate.go.txt \ - --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/v1/apis/listers \ - --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/listers \ - --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements" \ - github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1 \ + --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/${api_version}/apis/listers \ + --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/listers \ + --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements,NetworkQoS:NetworkQoSes" \ + github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version} \ "$@" - echo "Generating informers for $crd" + echo "Generating informers for $crd ($api_version)" informer-gen \ --go-header-file hack/boilerplate.go.txt \ - --versioned-clientset-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/clientset/versioned \ - --listers-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/listers \ - --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/v1/apis/informers \ - --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1/apis/informers \ - --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements" \ - github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/v1 \ + --versioned-clientset-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/clientset/versioned \ + --listers-package github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/listers \ + --output-dir "${SCRIPT_ROOT}"/pkg/crd/$crd/${api_version}/apis/informers \ + --output-pkg github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version}/apis/informers \ + --plural-exceptions="EgressQoS:EgressQoSes,RouteAdvertisements:RouteAdvertisements,NetworkQoS:NetworkQoSes" \ + github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/$crd/${api_version} \ "$@" done @@ -115,6 +129,8 @@ echo "Copying adminpolicybasedexternalroutes CRD" cp _output/crds/k8s.ovn.org_adminpolicybasedexternalroutes.yaml ../dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 echo "Copying egressService CRD" cp _output/crds/k8s.ovn.org_egressservices.yaml ../dist/templates/k8s.ovn.org_egressservices.yaml.j2 +echo "Copying networkQoS CRD" +cp _output/crds/k8s.ovn.org_networkqoses.yaml ../dist/templates/k8s.ovn.org_networkqoses.yaml.j2 echo "Copying userdefinednetworks CRD" cp _output/crds/k8s.ovn.org_userdefinednetworks.yaml ../dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 echo "Copying clusteruserdefinednetworks CRD" diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller.go b/go-controller/pkg/clustermanager/routeadvertisements/controller.go index 4eefde5282..04daa6cde1 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller.go @@ -116,7 +116,7 @@ func NewController( ) } - return c.updateRAStatus(ra, false, err) + return c.updateRAStatus(ra, false, errorstatus) } raConfig := &controllerutil.ControllerConfig[ratypes.RouteAdvertisements]{ @@ -374,7 +374,11 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) } if config.Gateway.Mode == config.GatewayModeLocal && network.TopologyType() == types.Layer2Topology { - return nil, nil, fmt.Errorf("%w: BGP is currenty not supported for Layer2 networks in local gateway mode, network: %s", errConfig, network.GetNetworkName()) + return nil, nil, fmt.Errorf("%w: BGP is currently not supported for Layer2 networks in local gateway mode, network: %s", errConfig, network.GetNetworkName()) + } + + if advertisements.Has(ratypes.EgressIP) && network.TopologyType() == types.Layer2Topology { + return nil, nil, fmt.Errorf("%w: EgressIP advertisement is currently not supported for Layer2 networks, network: %s", errConfig, network.GetNetworkName()) } vrf := util.GetNetworkVRFName(network) diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index c63f8fdc80..03e9391888 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -558,7 +558,7 @@ func TestController_reconcile(t *testing.T) { expectNADAnnotations: map[string]map[string]string{"blue": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, }, { - name: "(layer2) reconciles eip RouteAdvertisement for a single FRR config, node, non default networks and non default target VRF", + name: "(layer2) fails to reconcile eip RouteAdvertisement for a single FRR config, node, non default networks and non default target VRF", ra: &testRA{Name: "ra", TargetVRF: "green", AdvertiseEgressIPs: true, NetworkSelector: map[string]string{"selected": "true"}}, frrConfigs: []*testFRRConfig{ { @@ -589,20 +589,11 @@ func TestController_reconcile(t *testing.T) { {Name: "eip2", EIPs: map[string]string{"node": "1.0.1.4"}, NamespaceSelector: map[string]string{"selected": "black"}}, // namespace served by unselected network, ignored {Name: "eip3", EIPs: map[string]string{"node": "1.0.1.5"}, NamespaceSelector: map[string]string{"selected": "green"}}, }, - reconcile: "ra", - expectAcceptedStatus: metav1.ConditionTrue, - expectFRRConfigs: []*testFRRConfig{ - { - Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, - Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, - NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, - Routers: []*testRouter{ - {ASN: 1, VRF: "green", Prefixes: []string{"1.0.1.5/32", "172.100.0.17/32"}, Neighbors: []*testNeighbor{ - {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.0.1.5/32", "172.100.0.17/32"}}, - }}, - }}, - }, - expectNADAnnotations: map[string]map[string]string{"green": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + reconcile: "ra", + // EgressIP advertisements for Layer2 UDNs is not supported yet. + expectAcceptedStatus: metav1.ConditionFalse, + expectFRRConfigs: []*testFRRConfig{}, + expectNADAnnotations: map[string]map[string]string{"green": {}}, }, { name: "reconciles a RouteAdvertisement updating the generated FRRConfigurations if needed", diff --git a/go-controller/pkg/clustermanager/status_manager/networkqos_manager.go b/go-controller/pkg/clustermanager/status_manager/networkqos_manager.go new file mode 100644 index 0000000000..5be1390505 --- /dev/null +++ b/go-controller/pkg/clustermanager/status_manager/networkqos_manager.go @@ -0,0 +1,83 @@ +package status_manager + +import ( + "context" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosapply "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1" + networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + networkqoslisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +type networkQoSManager struct { + lister networkqoslisters.NetworkQoSLister + client networkqosclientset.Interface +} + +func newNetworkQoSManager(lister networkqoslisters.NetworkQoSLister, client networkqosclientset.Interface) *networkQoSManager { + return &networkQoSManager{ + lister: lister, + client: client, + } +} + +//lint:ignore U1000 generic interfaces throw false-positives https://github.com/dominikh/go-tools/issues/1440 +func (m *networkQoSManager) get(namespace, name string) (*networkqosapi.NetworkQoS, error) { + return m.lister.NetworkQoSes(namespace).Get(name) +} + +//lint:ignore U1000 generic interfaces throw false-positives +func (m *networkQoSManager) getMessages(networkQoS *networkqosapi.NetworkQoS) []string { + var messages []string + for _, condition := range networkQoS.Status.Conditions { + messages = append(messages, condition.Message) + } + return messages +} + +//lint:ignore U1000 generic interfaces throw false-positives +func (m *networkQoSManager) updateStatus(networkQoS *networkqosapi.NetworkQoS, applyOpts *metav1.ApplyOptions, + applyEmptyOrFailed bool) error { + if networkQoS == nil { + return nil + } + newStatus := "NetworkQoS Destinations applied" + for _, condition := range networkQoS.Status.Conditions { + if strings.Contains(condition.Message, types.NetworkQoSErrorMsg) { + newStatus = types.NetworkQoSErrorMsg + break + } + } + if applyEmptyOrFailed && newStatus != types.NetworkQoSErrorMsg { + newStatus = "" + } + + if networkQoS.Status.Status == newStatus { + // already set to the same value + return nil + } + + applyStatus := networkqosapply.Status() + if newStatus != "" { + applyStatus.WithStatus(newStatus) + } + + applyObj := networkqosapply.NetworkQoS(networkQoS.Name, networkQoS.Namespace). + WithStatus(applyStatus) + + _, err := m.client.K8sV1alpha1().NetworkQoSes(networkQoS.Namespace).ApplyStatus(context.TODO(), applyObj, *applyOpts) + return err +} + +//lint:ignore U1000 generic interfaces throw false-positives +func (m *networkQoSManager) cleanupStatus(networkQoS *networkqosapi.NetworkQoS, applyOpts *metav1.ApplyOptions) error { + applyObj := networkqosapply.NetworkQoS(networkQoS.Name, networkQoS.Namespace). + WithStatus(networkqosapply.Status()) + + _, err := m.client.K8sV1alpha1().NetworkQoSes(networkQoS.Namespace).ApplyStatus(context.TODO(), applyObj, *applyOpts) + return err +} diff --git a/go-controller/pkg/clustermanager/status_manager/status_manager.go b/go-controller/pkg/clustermanager/status_manager/status_manager.go index 224dc566ff..e770b054ff 100644 --- a/go-controller/pkg/clustermanager/status_manager/status_manager.go +++ b/go-controller/pkg/clustermanager/status_manager/status_manager.go @@ -20,6 +20,7 @@ import ( adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" egressfirewallapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -203,6 +204,16 @@ func NewStatusManager(wf *factory.WatchFactory, ovnClient *util.OVNClusterManage ) sm.typedManagers["egressqoses"] = egressQoSManager } + if config.OVNKubernetesFeature.EnableNetworkQoS { + networkQoSManager := newStatusManager[networkqosapi.NetworkQoS]( + "networkqoses_statusmanager", + wf.NetworkQoSInformer().Informer(), + wf.NetworkQoSInformer().Lister().List, + newNetworkQoSManager(wf.NetworkQoSInformer().Lister(), ovnClient.NetworkQoSClient), + sm.withZonesRLock, + ) + sm.typedManagers["networkqoses"] = networkQoSManager + } return sm } diff --git a/go-controller/pkg/clustermanager/status_manager/status_manager_test.go b/go-controller/pkg/clustermanager/status_manager/status_manager_test.go index 56fea90a79..6621ac20f4 100644 --- a/go-controller/pkg/clustermanager/status_manager/status_manager_test.go +++ b/go-controller/pkg/clustermanager/status_manager/status_manager_test.go @@ -7,6 +7,7 @@ import ( "sync/atomic" corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" @@ -21,6 +22,8 @@ import ( adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" egressfirewallapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -203,6 +206,77 @@ func checkEmptyEQStatusConsistently(egressQoS *egressqosapi.EgressQoS, fakeClien }).Should(BeTrue(), "expected Status to be consistently empty") } +func newNetworkQoS(namespace string) *networkqosapi.NetworkQoS { + return &networkqosapi.NetworkQoS{ + ObjectMeta: util.NewObjectMeta("default", namespace), + Spec: networkqosapi.Spec{ + NetworkSelectors: []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.NetworkAttachmentDefinitions, + NetworkAttachmentDefinitionSelector: &crdtypes.NetworkAttachmentDefinitionSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "stream", + }, + }, + }, + }, + }, + Priority: 100, + Egress: []networkqosapi.Rule{ + { + DSCP: 60, + Classifier: networkqosapi.Classifier{ + To: []networkqosapi.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "1.2.3.4/32", + }, + }, + }, + }, + Bandwidth: networkqosapi.Bandwidth{ + Rate: 100, + Burst: 1000, + }, + }, + }, + }, + } +} + +func updateNetworkQoSStatus(networkQoS *networkqosapi.NetworkQoS, status *networkqosapi.Status, + fakeClient *util.OVNClusterManagerClientset) { + networkQoS.Status = *status + _, err := fakeClient.NetworkQoSClient.K8sV1alpha1().NetworkQoSes(networkQoS.Namespace). + Update(context.TODO(), networkQoS, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) +} + +func checkNQStatusEventually(networkQoS *networkqosapi.NetworkQoS, expectFailure bool, expectEmpty bool, fakeClient *util.OVNClusterManagerClientset) { + Eventually(func() bool { + eq, err := fakeClient.NetworkQoSClient.K8sV1alpha1().NetworkQoSes(networkQoS.Namespace). + Get(context.TODO(), networkQoS.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + if expectFailure { + return strings.Contains(eq.Status.Status, types.NetworkQoSErrorMsg) + } else if expectEmpty { + return eq.Status.Status == "" + } else { + return strings.Contains(eq.Status.Status, "applied") + } + }).Should(BeTrue(), fmt.Sprintf("expected network QoS status with expectFailure=%v expectEmpty=%v", expectFailure, expectEmpty)) +} + +func checkEmptyNQStatusConsistently(networkQoS *networkqosapi.NetworkQoS, fakeClient *util.OVNClusterManagerClientset) { + Consistently(func() bool { + ef, err := fakeClient.NetworkQoSClient.K8sV1alpha1().NetworkQoSes(networkQoS.Namespace). + Get(context.TODO(), networkQoS.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return ef.Status.Status == "" + }).Should(BeTrue(), "expected Status to be consistently empty") +} + var _ = Describe("Cluster Manager Status Manager", func() { var ( statusManager *StatusManager @@ -505,4 +579,96 @@ var _ = Describe("Cluster Manager Status Manager", func() { return atomic.LoadUint32(&banpWerePatched) }).Should(Equal(uint32(2))) }) + + It("updates NetworkQoS status with 1 zone", func() { + config.OVNKubernetesFeature.EnableNetworkQoS = true + zones := sets.New[string]("zone1") + namespace1 := util.NewNamespace(namespace1Name) + networkQoS := newNetworkQoS(namespace1.Name) + start(zones, namespace1, networkQoS) + updateNetworkQoSStatus(networkQoS, &networkqosapi.Status{ + Conditions: []metav1.Condition{{ + Type: "Ready-In-Zone-zone1", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }}, + }, fakeClient) + + checkNQStatusEventually(networkQoS, false, false, fakeClient) + }) + + It("updates NetworkQoS status with 2 zones", func() { + config.OVNKubernetesFeature.EnableNetworkQoS = true + zones := sets.New[string]("zone1", "zone2") + namespace1 := util.NewNamespace(namespace1Name) + networkQoS := newNetworkQoS(namespace1.Name) + start(zones, namespace1, networkQoS) + + updateNetworkQoSStatus(networkQoS, &networkqosapi.Status{ + Conditions: []metav1.Condition{{ + Type: "Ready-In-Zone-zone1", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }}, + }, fakeClient) + + checkEmptyNQStatusConsistently(networkQoS, fakeClient) + + updateNetworkQoSStatus(networkQoS, &networkqosapi.Status{ + Conditions: []metav1.Condition{{ + Type: "Ready-In-Zone-zone1", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }, { + Type: "Ready-In-Zone-zone2", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }}, + }, fakeClient) + checkNQStatusEventually(networkQoS, false, false, fakeClient) + + }) + + It("updates NetworkQoS status with UnknownZone", func() { + config.OVNKubernetesFeature.EnableNetworkQoS = true + zones := sets.New[string]("zone1", zone_tracker.UnknownZone) + namespace1 := util.NewNamespace(namespace1Name) + networkQoS := newNetworkQoS(namespace1.Name) + start(zones, namespace1, networkQoS) + + // no matter how many messages are in the status, it won't be updated while UnknownZone is present + updateNetworkQoSStatus(networkQoS, &networkqosapi.Status{ + Conditions: []metav1.Condition{{ + Type: "Ready-In-Zone-zone1", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }}, + }, fakeClient) + checkEmptyNQStatusConsistently(networkQoS, fakeClient) + + // when UnknownZone is removed, updates will be handled, but status from the new zone is not reported yet + statusManager.onZoneUpdate(sets.New[string]("zone1", "zone2")) + checkEmptyNQStatusConsistently(networkQoS, fakeClient) + // when new zone status is reported, status will be set + updateNetworkQoSStatus(networkQoS, &networkqosapi.Status{ + Conditions: []metav1.Condition{{ + Type: "Ready-In-Zone-zone1", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }, { + Type: "Ready-In-Zone-zone2", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "NetworkQoS Destinations applied", + }}, + }, fakeClient) + checkNQStatusEventually(networkQoS, false, false, fakeClient) + }) + }) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index ba3bdaee43..e2cc865265 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -182,8 +182,12 @@ func (pr *PodRequest) cmdAddWithGetCNIResultFunc( if pr.CNIConf.PhysicalNetworkName != "" { netName = pr.CNIConf.PhysicalNetworkName } - if err := checkBridgeMapping(ovsClient, pr.CNIConf.Topology, netName); err != nil { - return nil, fmt.Errorf("failed bridge mapping validation: %w", err) + + // Skip checking bridge mapping on DPU hosts as OVS is not present + if config.OvnKubeNode.Mode != types.NodeModeDPUHost { + if err := checkBridgeMapping(ovsClient, pr.CNIConf.Topology, netName); err != nil { + return nil, fmt.Errorf("failed bridge mapping validation: %w", err) + } } response.Result, err = getCNIResultFn(pr, clientset, podInterfaceInfo) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index f2bc55925e..c7df666cbc 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -432,6 +432,7 @@ type OVNKubernetesFeatureConfig struct { EnableDNSNameResolver bool `gcfg:"enable-dns-name-resolver"` EnableServiceTemplateSupport bool `gcfg:"enable-svc-template-support"` EnableObservability bool `gcfg:"enable-observability"` + EnableNetworkQoS bool `gcfg:"enable-network-qos"` } // GatewayMode holds the node gateway mode @@ -1141,6 +1142,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableObservability, Value: OVNKubernetesFeature.EnableObservability, }, + &cli.BoolFlag{ + Name: "enable-network-qos", + Usage: "Configure to use NetworkQoS CRD feature with ovn-kubernetes.", + Destination: &cliConfig.OVNKubernetesFeature.EnableNetworkQoS, + Value: OVNKubernetesFeature.EnableNetworkQoS, + }, } // K8sFlags capture Kubernetes-related options diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index 5d0c1fb7a4..06d88c4ce4 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -8,6 +8,7 @@ import ( "github.com/containernetworking/cni/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" @@ -139,8 +140,12 @@ func (cm *ControllerManager) GetDefaultNetworkController() networkmanager.Reconc func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) error { existingNetworksMap := map[string]string{} + validNetworksSubnets := sets.New[string]() for _, network := range validNetworks { existingNetworksMap[network.GetNetworkName()] = network.TopologyType() + for _, subnet := range network.Subnets() { + validNetworksSubnets.Insert(subnet.CIDR.String()) + } } // Get all the existing secondary networks and its logical entities @@ -188,6 +193,29 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) klog.Errorf("Failed to delete stale OVN logical entities for network %s: %v", netName, err) } } + + if util.IsRouteAdvertisementsEnabled() { + // Remove stale subnets from the advertised networks address set used for isolation + // NOTE: network reconciliation will take care of removing the subnets for existing networks that are no longer + // advertised. + addressSetFactory := addressset.NewOvnAddressSetFactory(cm.nbClient, config.IPv4Mode, config.IPv6Mode) + advertisedSubnets, err := addressSetFactory.GetAddressSet(ovn.GetAdvertisedNetworkSubnetsAddressSetDBIDs()) + if err != nil { + return fmt.Errorf("failed to get advertised subnets addresset %s: %w", ovn.GetAdvertisedNetworkSubnetsAddressSetDBIDs(), err) + } + v4AdvertisedSubnets, v6AdvertisedSubnets := advertisedSubnets.GetAddresses() + var invalidSubnets []string + for _, subnet := range append(v4AdvertisedSubnets, v6AdvertisedSubnets...) { + if !validNetworksSubnets.Has(subnet) { + klog.Infof("Cleanup stale advertised subnet: %q", subnet) + invalidSubnets = append(invalidSubnets, subnet) + } + } + + if err := advertisedSubnets.DeleteAddresses(invalidSubnets); err != nil { + klog.Errorf("Failed to delete stale advertised subnets: %v", invalidSubnets) + } + } return nil } @@ -210,6 +238,7 @@ func NewControllerManager(ovnClient *util.OVNClientset, wf *factory.WatchFactory APBRouteClient: ovnClient.AdminPolicyRouteClient, EgressQoSClient: ovnClient.EgressQoSClient, IPAMClaimsClient: ovnClient.IPAMClaimsClient, + NetworkQoSClient: ovnClient.NetworkQoSClient, }, stopChan: stopCh, watchFactory: wf, @@ -450,6 +479,11 @@ func (cm *ControllerManager) Start(ctx context.Context) error { return fmt.Errorf("failed to init default network controller: %v", err) } + if util.IsRouteAdvertisementsEnabled() { + if err := cm.configureAdvertisedNetworkIsolation(); err != nil { + return fmt.Errorf("failed to initialize advertised network isolation: %w", err) + } + } if cm.networkManager != nil { if err = cm.networkManager.Start(); err != nil { return fmt.Errorf("failed to start NAD Controller :%v", err) @@ -494,3 +528,9 @@ func (cm *ControllerManager) Stop() { func (cm *ControllerManager) Reconcile(_ string, _, _ util.NetInfo) error { return nil } + +func (cm *ControllerManager) configureAdvertisedNetworkIsolation() error { + addressSetFactory := addressset.NewOvnAddressSetFactory(cm.nbClient, config.IPv4Mode, config.IPv6Mode) + _, err := addressSetFactory.EnsureAddressSet(ovn.GetAdvertisedNetworkSubnetsAddressSetDBIDs()) + return err +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/internal/internal.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/internal/internal.go new file mode 100644 index 0000000000..0370ccbc97 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/internal/internal.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package internal + +import ( + fmt "fmt" + sync "sync" + + typed "sigs.k8s.io/structured-merge-diff/v4/typed" +) + +func Parser() *typed.Parser { + parserOnce.Do(func() { + var err error + parser, err = typed.NewParser(schemaYAML) + if err != nil { + panic(fmt.Sprintf("Failed to parse schema: %v", err)) + } + }) + return parser +} + +var parserOnce sync.Once +var parser *typed.Parser +var schemaYAML = typed.YAMLObject(`types: +- name: __untyped_atomic_ + scalar: untyped + list: + elementType: + namedType: __untyped_atomic_ + elementRelationship: atomic + map: + elementType: + namedType: __untyped_atomic_ + elementRelationship: atomic +- name: __untyped_deduced_ + scalar: untyped + list: + elementType: + namedType: __untyped_atomic_ + elementRelationship: atomic + map: + elementType: + namedType: __untyped_deduced_ + elementRelationship: separable +`) diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/bandwidth.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/bandwidth.go new file mode 100644 index 0000000000..3a00efc01e --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/bandwidth.go @@ -0,0 +1,47 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// BandwidthApplyConfiguration represents a declarative configuration of the Bandwidth type for use +// with apply. +type BandwidthApplyConfiguration struct { + Rate *uint32 `json:"rate,omitempty"` + Burst *uint32 `json:"burst,omitempty"` +} + +// BandwidthApplyConfiguration constructs a declarative configuration of the Bandwidth type for use with +// apply. +func Bandwidth() *BandwidthApplyConfiguration { + return &BandwidthApplyConfiguration{} +} + +// WithRate sets the Rate field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Rate field is set to the value of the last call. +func (b *BandwidthApplyConfiguration) WithRate(value uint32) *BandwidthApplyConfiguration { + b.Rate = &value + return b +} + +// WithBurst sets the Burst field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Burst field is set to the value of the last call. +func (b *BandwidthApplyConfiguration) WithBurst(value uint32) *BandwidthApplyConfiguration { + b.Burst = &value + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/classifier.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/classifier.go new file mode 100644 index 0000000000..01c1546427 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/classifier.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" +) + +// ClassifierApplyConfiguration represents a declarative configuration of the Classifier type for use +// with apply. +type ClassifierApplyConfiguration struct { + To []DestinationApplyConfiguration `json:"to,omitempty"` + Ports []*networkqosv1alpha1.Port `json:"ports,omitempty"` +} + +// ClassifierApplyConfiguration constructs a declarative configuration of the Classifier type for use with +// apply. +func Classifier() *ClassifierApplyConfiguration { + return &ClassifierApplyConfiguration{} +} + +// WithTo adds the given value to the To field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the To field. +func (b *ClassifierApplyConfiguration) WithTo(values ...*DestinationApplyConfiguration) *ClassifierApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithTo") + } + b.To = append(b.To, *values[i]) + } + return b +} + +// WithPorts adds the given value to the Ports field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Ports field. +func (b *ClassifierApplyConfiguration) WithPorts(values ...**networkqosv1alpha1.Port) *ClassifierApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithPorts") + } + b.Ports = append(b.Ports, *values[i]) + } + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/destination.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/destination.go new file mode 100644 index 0000000000..49f3c10101 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/destination.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + networkingv1 "k8s.io/api/networking/v1" + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// DestinationApplyConfiguration represents a declarative configuration of the Destination type for use +// with apply. +type DestinationApplyConfiguration struct { + PodSelector *v1.LabelSelectorApplyConfiguration `json:"podSelector,omitempty"` + NamespaceSelector *v1.LabelSelectorApplyConfiguration `json:"namespaceSelector,omitempty"` + IPBlock *networkingv1.IPBlock `json:"ipBlock,omitempty"` +} + +// DestinationApplyConfiguration constructs a declarative configuration of the Destination type for use with +// apply. +func Destination() *DestinationApplyConfiguration { + return &DestinationApplyConfiguration{} +} + +// WithPodSelector sets the PodSelector field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PodSelector field is set to the value of the last call. +func (b *DestinationApplyConfiguration) WithPodSelector(value *v1.LabelSelectorApplyConfiguration) *DestinationApplyConfiguration { + b.PodSelector = value + return b +} + +// WithNamespaceSelector sets the NamespaceSelector field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the NamespaceSelector field is set to the value of the last call. +func (b *DestinationApplyConfiguration) WithNamespaceSelector(value *v1.LabelSelectorApplyConfiguration) *DestinationApplyConfiguration { + b.NamespaceSelector = value + return b +} + +// WithIPBlock sets the IPBlock field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the IPBlock field is set to the value of the last call. +func (b *DestinationApplyConfiguration) WithIPBlock(value networkingv1.IPBlock) *DestinationApplyConfiguration { + b.IPBlock = &value + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/networkqos.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/networkqos.go new file mode 100644 index 0000000000..d1cebcab83 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/networkqos.go @@ -0,0 +1,224 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// NetworkQoSApplyConfiguration represents a declarative configuration of the NetworkQoS type for use +// with apply. +type NetworkQoSApplyConfiguration struct { + v1.TypeMetaApplyConfiguration `json:",inline"` + *v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"` + Spec *SpecApplyConfiguration `json:"spec,omitempty"` + Status *StatusApplyConfiguration `json:"status,omitempty"` +} + +// NetworkQoS constructs a declarative configuration of the NetworkQoS type for use with +// apply. +func NetworkQoS(name, namespace string) *NetworkQoSApplyConfiguration { + b := &NetworkQoSApplyConfiguration{} + b.WithName(name) + b.WithNamespace(namespace) + b.WithKind("NetworkQoS") + b.WithAPIVersion("k8s.ovn.org/v1alpha1") + return b +} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithKind(value string) *NetworkQoSApplyConfiguration { + b.TypeMetaApplyConfiguration.Kind = &value + return b +} + +// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the APIVersion field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithAPIVersion(value string) *NetworkQoSApplyConfiguration { + b.TypeMetaApplyConfiguration.APIVersion = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithName(value string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Name = &value + return b +} + +// WithGenerateName sets the GenerateName field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the GenerateName field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithGenerateName(value string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.GenerateName = &value + return b +} + +// WithNamespace sets the Namespace field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Namespace field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithNamespace(value string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Namespace = &value + return b +} + +// WithUID sets the UID field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the UID field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithUID(value types.UID) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.UID = &value + return b +} + +// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ResourceVersion field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithResourceVersion(value string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.ResourceVersion = &value + return b +} + +// WithGeneration sets the Generation field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Generation field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithGeneration(value int64) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.Generation = &value + return b +} + +// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the CreationTimestamp field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithCreationTimestamp(value metav1.Time) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.CreationTimestamp = &value + return b +} + +// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DeletionTimestamp field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value + return b +} + +// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value + return b +} + +// WithLabels puts the entries into the Labels field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the Labels field, +// overwriting an existing map entries in Labels field with the same key. +func (b *NetworkQoSApplyConfiguration) WithLabels(entries map[string]string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 { + b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.ObjectMetaApplyConfiguration.Labels[k] = v + } + return b +} + +// WithAnnotations puts the entries into the Annotations field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the Annotations field, +// overwriting an existing map entries in Annotations field with the same key. +func (b *NetworkQoSApplyConfiguration) WithAnnotations(entries map[string]string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 { + b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.ObjectMetaApplyConfiguration.Annotations[k] = v + } + return b +} + +// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the OwnerReferences field. +func (b *NetworkQoSApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + for i := range values { + if values[i] == nil { + panic("nil value passed to WithOwnerReferences") + } + b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i]) + } + return b +} + +// WithFinalizers adds the given value to the Finalizers field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Finalizers field. +func (b *NetworkQoSApplyConfiguration) WithFinalizers(values ...string) *NetworkQoSApplyConfiguration { + b.ensureObjectMetaApplyConfigurationExists() + for i := range values { + b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i]) + } + return b +} + +func (b *NetworkQoSApplyConfiguration) ensureObjectMetaApplyConfigurationExists() { + if b.ObjectMetaApplyConfiguration == nil { + b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{} + } +} + +// WithSpec sets the Spec field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Spec field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithSpec(value *SpecApplyConfiguration) *NetworkQoSApplyConfiguration { + b.Spec = value + return b +} + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *NetworkQoSApplyConfiguration) WithStatus(value *StatusApplyConfiguration) *NetworkQoSApplyConfiguration { + b.Status = value + return b +} + +// GetName retrieves the value of the Name field in the declarative configuration. +func (b *NetworkQoSApplyConfiguration) GetName() *string { + b.ensureObjectMetaApplyConfigurationExists() + return b.ObjectMetaApplyConfiguration.Name +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/port.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/port.go new file mode 100644 index 0000000000..a828942403 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/port.go @@ -0,0 +1,47 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// PortApplyConfiguration represents a declarative configuration of the Port type for use +// with apply. +type PortApplyConfiguration struct { + Protocol *string `json:"protocol,omitempty"` + Port *int32 `json:"port,omitempty"` +} + +// PortApplyConfiguration constructs a declarative configuration of the Port type for use with +// apply. +func Port() *PortApplyConfiguration { + return &PortApplyConfiguration{} +} + +// WithProtocol sets the Protocol field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Protocol field is set to the value of the last call. +func (b *PortApplyConfiguration) WithProtocol(value string) *PortApplyConfiguration { + b.Protocol = &value + return b +} + +// WithPort sets the Port field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Port field is set to the value of the last call. +func (b *PortApplyConfiguration) WithPort(value int32) *PortApplyConfiguration { + b.Port = &value + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/rule.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/rule.go new file mode 100644 index 0000000000..6d332d3bb2 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/rule.go @@ -0,0 +1,56 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// RuleApplyConfiguration represents a declarative configuration of the Rule type for use +// with apply. +type RuleApplyConfiguration struct { + DSCP *int `json:"dscp,omitempty"` + Classifier *ClassifierApplyConfiguration `json:"classifier,omitempty"` + Bandwidth *BandwidthApplyConfiguration `json:"bandwidth,omitempty"` +} + +// RuleApplyConfiguration constructs a declarative configuration of the Rule type for use with +// apply. +func Rule() *RuleApplyConfiguration { + return &RuleApplyConfiguration{} +} + +// WithDSCP sets the DSCP field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DSCP field is set to the value of the last call. +func (b *RuleApplyConfiguration) WithDSCP(value int) *RuleApplyConfiguration { + b.DSCP = &value + return b +} + +// WithClassifier sets the Classifier field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Classifier field is set to the value of the last call. +func (b *RuleApplyConfiguration) WithClassifier(value *ClassifierApplyConfiguration) *RuleApplyConfiguration { + b.Classifier = value + return b +} + +// WithBandwidth sets the Bandwidth field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Bandwidth field is set to the value of the last call. +func (b *RuleApplyConfiguration) WithBandwidth(value *BandwidthApplyConfiguration) *RuleApplyConfiguration { + b.Bandwidth = value + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/spec.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/spec.go new file mode 100644 index 0000000000..848cbe073d --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/spec.go @@ -0,0 +1,75 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + types "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// SpecApplyConfiguration represents a declarative configuration of the Spec type for use +// with apply. +type SpecApplyConfiguration struct { + NetworkSelectors *types.NetworkSelectors `json:"networkSelectors,omitempty"` + PodSelector *v1.LabelSelectorApplyConfiguration `json:"podSelector,omitempty"` + Priority *int `json:"priority,omitempty"` + Egress []RuleApplyConfiguration `json:"egress,omitempty"` +} + +// SpecApplyConfiguration constructs a declarative configuration of the Spec type for use with +// apply. +func Spec() *SpecApplyConfiguration { + return &SpecApplyConfiguration{} +} + +// WithNetworkSelectors sets the NetworkSelectors field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the NetworkSelectors field is set to the value of the last call. +func (b *SpecApplyConfiguration) WithNetworkSelectors(value types.NetworkSelectors) *SpecApplyConfiguration { + b.NetworkSelectors = &value + return b +} + +// WithPodSelector sets the PodSelector field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PodSelector field is set to the value of the last call. +func (b *SpecApplyConfiguration) WithPodSelector(value *v1.LabelSelectorApplyConfiguration) *SpecApplyConfiguration { + b.PodSelector = value + return b +} + +// WithPriority sets the Priority field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Priority field is set to the value of the last call. +func (b *SpecApplyConfiguration) WithPriority(value int) *SpecApplyConfiguration { + b.Priority = &value + return b +} + +// WithEgress adds the given value to the Egress field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Egress field. +func (b *SpecApplyConfiguration) WithEgress(values ...*RuleApplyConfiguration) *SpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithEgress") + } + b.Egress = append(b.Egress, *values[i]) + } + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/status.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/status.go new file mode 100644 index 0000000000..aed88afef0 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1/status.go @@ -0,0 +1,56 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// StatusApplyConfiguration represents a declarative configuration of the Status type for use +// with apply. +type StatusApplyConfiguration struct { + Status *string `json:"status,omitempty"` + Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"` +} + +// StatusApplyConfiguration constructs a declarative configuration of the Status type for use with +// apply. +func Status() *StatusApplyConfiguration { + return &StatusApplyConfiguration{} +} + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *StatusApplyConfiguration) WithStatus(value string) *StatusApplyConfiguration { + b.Status = &value + return b +} + +// WithConditions adds the given value to the Conditions field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Conditions field. +func (b *StatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *StatusApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithConditions") + } + b.Conditions = append(b.Conditions, *values[i]) + } + return b +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/utils.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/utils.go new file mode 100644 index 0000000000..900d00fd22 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/utils.go @@ -0,0 +1,57 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package applyconfiguration + +import ( + v1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + internal "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/internal" + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + testing "k8s.io/client-go/testing" +) + +// ForKind returns an apply configuration type for the given GroupVersionKind, or nil if no +// apply configuration type exists for the given GroupVersionKind. +func ForKind(kind schema.GroupVersionKind) interface{} { + switch kind { + // Group=k8s.ovn.org, Version=v1alpha1 + case v1alpha1.SchemeGroupVersion.WithKind("Bandwidth"): + return &networkqosv1alpha1.BandwidthApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Classifier"): + return &networkqosv1alpha1.ClassifierApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Destination"): + return &networkqosv1alpha1.DestinationApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("NetworkQoS"): + return &networkqosv1alpha1.NetworkQoSApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Port"): + return &networkqosv1alpha1.PortApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Rule"): + return &networkqosv1alpha1.RuleApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Spec"): + return &networkqosv1alpha1.SpecApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Status"): + return &networkqosv1alpha1.StatusApplyConfiguration{} + + } + return nil +} + +func NewTypeConverter(scheme *runtime.Scheme) *testing.TypeConverter { + return &testing.TypeConverter{Scheme: scheme, TypeResolver: internal.Parser()} +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/clientset.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/clientset.go new file mode 100644 index 0000000000..df6ec4df46 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/clientset.go @@ -0,0 +1,119 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package versioned + +import ( + fmt "fmt" + http "net/http" + + k8sv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1" + discovery "k8s.io/client-go/discovery" + rest "k8s.io/client-go/rest" + flowcontrol "k8s.io/client-go/util/flowcontrol" +) + +type Interface interface { + Discovery() discovery.DiscoveryInterface + K8sV1alpha1() k8sv1alpha1.K8sV1alpha1Interface +} + +// Clientset contains the clients for groups. +type Clientset struct { + *discovery.DiscoveryClient + k8sV1alpha1 *k8sv1alpha1.K8sV1alpha1Client +} + +// K8sV1alpha1 retrieves the K8sV1alpha1Client +func (c *Clientset) K8sV1alpha1() k8sv1alpha1.K8sV1alpha1Interface { + return c.k8sV1alpha1 +} + +// Discovery retrieves the DiscoveryClient +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + if c == nil { + return nil + } + return c.DiscoveryClient +} + +// NewForConfig creates a new Clientset for the given config. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfig will generate a rate-limiter in configShallowCopy. +// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), +// where httpClient was generated with rest.HTTPClientFor(c). +func NewForConfig(c *rest.Config) (*Clientset, error) { + configShallowCopy := *c + + if configShallowCopy.UserAgent == "" { + configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent() + } + + // share the transport between all clients + httpClient, err := rest.HTTPClientFor(&configShallowCopy) + if err != nil { + return nil, err + } + + return NewForConfigAndClient(&configShallowCopy, httpClient) +} + +// NewForConfigAndClient creates a new Clientset for the given config and http client. +// Note the http client provided takes precedence over the configured transport values. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfigAndClient will generate a rate-limiter in configShallowCopy. +func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) { + configShallowCopy := *c + if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { + if configShallowCopy.Burst <= 0 { + return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") + } + configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) + } + + var cs Clientset + var err error + cs.k8sV1alpha1, err = k8sv1alpha1.NewForConfigAndClient(&configShallowCopy, httpClient) + if err != nil { + return nil, err + } + + cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient) + if err != nil { + return nil, err + } + return &cs, nil +} + +// NewForConfigOrDie creates a new Clientset for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *Clientset { + cs, err := NewForConfig(c) + if err != nil { + panic(err) + } + return cs +} + +// New creates a new Clientset for the given RESTClient. +func New(c rest.Interface) *Clientset { + var cs Clientset + cs.k8sV1alpha1 = k8sv1alpha1.New(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClient(c) + return &cs +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/clientset_generated.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/clientset_generated.go new file mode 100644 index 0000000000..b61e9993b1 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/clientset_generated.go @@ -0,0 +1,121 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + applyconfiguration "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration" + clientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + k8sv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1" + fakek8sv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/discovery" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/testing" +) + +// NewSimpleClientset returns a clientset that will respond with the provided objects. +// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, +// without applying any field management, validations and/or defaults. It shouldn't be considered a replacement +// for a real clientset and is mostly useful in simple unit tests. +// +// DEPRECATED: NewClientset replaces this with support for field management, which significantly improves +// server side apply testing. NewClientset is only available when apply configurations are generated (e.g. +// via --with-applyconfig). +func NewSimpleClientset(objects ...runtime.Object) *Clientset { + o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) + for _, obj := range objects { + if err := o.Add(obj); err != nil { + panic(err) + } + } + + cs := &Clientset{tracker: o} + cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} + cs.AddReactor("*", "*", testing.ObjectReaction(o)) + cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := o.Watch(gvr, ns) + if err != nil { + return false, nil, err + } + return true, watch, nil + }) + + return cs +} + +// Clientset implements clientset.Interface. Meant to be embedded into a +// struct to get a default implementation. This makes faking out just the method +// you want to test easier. +type Clientset struct { + testing.Fake + discovery *fakediscovery.FakeDiscovery + tracker testing.ObjectTracker +} + +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + return c.discovery +} + +func (c *Clientset) Tracker() testing.ObjectTracker { + return c.tracker +} + +// NewClientset returns a clientset that will respond with the provided objects. +// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, +// without applying any validations and/or defaults. It shouldn't be considered a replacement +// for a real clientset and is mostly useful in simple unit tests. +func NewClientset(objects ...runtime.Object) *Clientset { + o := testing.NewFieldManagedObjectTracker( + scheme, + codecs.UniversalDecoder(), + applyconfiguration.NewTypeConverter(scheme), + ) + for _, obj := range objects { + if err := o.Add(obj); err != nil { + panic(err) + } + } + + cs := &Clientset{tracker: o} + cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} + cs.AddReactor("*", "*", testing.ObjectReaction(o)) + cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := o.Watch(gvr, ns) + if err != nil { + return false, nil, err + } + return true, watch, nil + }) + + return cs +} + +var ( + _ clientset.Interface = &Clientset{} + _ testing.FakeClient = &Clientset{} +) + +// K8sV1alpha1 retrieves the K8sV1alpha1Client +func (c *Clientset) K8sV1alpha1() k8sv1alpha1.K8sV1alpha1Interface { + return &fakek8sv1alpha1.FakeK8sV1alpha1{Fake: &c.Fake} +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/doc.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/doc.go new file mode 100644 index 0000000000..19e0028ffb --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated fake clientset. +package fake diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/register.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/register.go new file mode 100644 index 0000000000..38ba821acf --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + k8sv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1alpha1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(scheme)) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/doc.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/doc.go new file mode 100644 index 0000000000..1aec4021fc --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package contains the scheme of the automatically generated clientset. +package scheme diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/register.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/register.go new file mode 100644 index 0000000000..eb8b8af9d2 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package scheme + +import ( + k8sv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var Scheme = runtime.NewScheme() +var Codecs = serializer.NewCodecFactory(Scheme) +var ParameterCodec = runtime.NewParameterCodec(Scheme) +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1alpha1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(Scheme)) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/doc.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/doc.go new file mode 100644 index 0000000000..0e375e4fc2 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated typed clients. +package v1alpha1 diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/doc.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/doc.go new file mode 100644 index 0000000000..422564f2d5 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// Package fake has the automatically generated clients. +package fake diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos.go new file mode 100644 index 0000000000..7ccb48963e --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos.go @@ -0,0 +1,50 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1" + typednetworkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1" + gentype "k8s.io/client-go/gentype" +) + +// fakeNetworkQoSes implements NetworkQoSInterface +type fakeNetworkQoSes struct { + *gentype.FakeClientWithListAndApply[*v1alpha1.NetworkQoS, *v1alpha1.NetworkQoSList, *networkqosv1alpha1.NetworkQoSApplyConfiguration] + Fake *FakeK8sV1alpha1 +} + +func newFakeNetworkQoSes(fake *FakeK8sV1alpha1, namespace string) typednetworkqosv1alpha1.NetworkQoSInterface { + return &fakeNetworkQoSes{ + gentype.NewFakeClientWithListAndApply[*v1alpha1.NetworkQoS, *v1alpha1.NetworkQoSList, *networkqosv1alpha1.NetworkQoSApplyConfiguration]( + fake.Fake, + namespace, + v1alpha1.SchemeGroupVersion.WithResource("networkqoses"), + v1alpha1.SchemeGroupVersion.WithKind("NetworkQoS"), + func() *v1alpha1.NetworkQoS { return &v1alpha1.NetworkQoS{} }, + func() *v1alpha1.NetworkQoSList { return &v1alpha1.NetworkQoSList{} }, + func(dst, src *v1alpha1.NetworkQoSList) { dst.ListMeta = src.ListMeta }, + func(list *v1alpha1.NetworkQoSList) []*v1alpha1.NetworkQoS { return gentype.ToPointerSlice(list.Items) }, + func(list *v1alpha1.NetworkQoSList, items []*v1alpha1.NetworkQoS) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos_client.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos_client.go new file mode 100644 index 0000000000..ddfcb9e789 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/fake/fake_networkqos_client.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1" + rest "k8s.io/client-go/rest" + testing "k8s.io/client-go/testing" +) + +type FakeK8sV1alpha1 struct { + *testing.Fake +} + +func (c *FakeK8sV1alpha1) NetworkQoSes(namespace string) v1alpha1.NetworkQoSInterface { + return newFakeNetworkQoSes(c, namespace) +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FakeK8sV1alpha1) RESTClient() rest.Interface { + var ret *rest.RESTClient + return ret +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/generated_expansion.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/generated_expansion.go new file mode 100644 index 0000000000..474127f124 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/generated_expansion.go @@ -0,0 +1,20 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +type NetworkQoSExpansion interface{} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos.go new file mode 100644 index 0000000000..2381822dbe --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos.go @@ -0,0 +1,73 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + context "context" + + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + applyconfigurationnetworkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1" + scheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// NetworkQoSesGetter has a method to return a NetworkQoSInterface. +// A group's client should implement this interface. +type NetworkQoSesGetter interface { + NetworkQoSes(namespace string) NetworkQoSInterface +} + +// NetworkQoSInterface has methods to work with NetworkQoS resources. +type NetworkQoSInterface interface { + Create(ctx context.Context, networkQoS *networkqosv1alpha1.NetworkQoS, opts v1.CreateOptions) (*networkqosv1alpha1.NetworkQoS, error) + Update(ctx context.Context, networkQoS *networkqosv1alpha1.NetworkQoS, opts v1.UpdateOptions) (*networkqosv1alpha1.NetworkQoS, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, networkQoS *networkqosv1alpha1.NetworkQoS, opts v1.UpdateOptions) (*networkqosv1alpha1.NetworkQoS, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*networkqosv1alpha1.NetworkQoS, error) + List(ctx context.Context, opts v1.ListOptions) (*networkqosv1alpha1.NetworkQoSList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *networkqosv1alpha1.NetworkQoS, err error) + Apply(ctx context.Context, networkQoS *applyconfigurationnetworkqosv1alpha1.NetworkQoSApplyConfiguration, opts v1.ApplyOptions) (result *networkqosv1alpha1.NetworkQoS, err error) + // Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus(). + ApplyStatus(ctx context.Context, networkQoS *applyconfigurationnetworkqosv1alpha1.NetworkQoSApplyConfiguration, opts v1.ApplyOptions) (result *networkqosv1alpha1.NetworkQoS, err error) + NetworkQoSExpansion +} + +// networkQoSes implements NetworkQoSInterface +type networkQoSes struct { + *gentype.ClientWithListAndApply[*networkqosv1alpha1.NetworkQoS, *networkqosv1alpha1.NetworkQoSList, *applyconfigurationnetworkqosv1alpha1.NetworkQoSApplyConfiguration] +} + +// newNetworkQoSes returns a NetworkQoSes +func newNetworkQoSes(c *K8sV1alpha1Client, namespace string) *networkQoSes { + return &networkQoSes{ + gentype.NewClientWithListAndApply[*networkqosv1alpha1.NetworkQoS, *networkqosv1alpha1.NetworkQoSList, *applyconfigurationnetworkqosv1alpha1.NetworkQoSApplyConfiguration]( + "networkqoses", + c.RESTClient(), + scheme.ParameterCodec, + namespace, + func() *networkqosv1alpha1.NetworkQoS { return &networkqosv1alpha1.NetworkQoS{} }, + func() *networkqosv1alpha1.NetworkQoSList { return &networkqosv1alpha1.NetworkQoSList{} }, + ), + } +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos_client.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos_client.go new file mode 100644 index 0000000000..329c642e9c --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/typed/networkqos/v1alpha1/networkqos_client.go @@ -0,0 +1,106 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + http "net/http" + + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + scheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme" + rest "k8s.io/client-go/rest" +) + +type K8sV1alpha1Interface interface { + RESTClient() rest.Interface + NetworkQoSesGetter +} + +// K8sV1alpha1Client is used to interact with features provided by the k8s.ovn.org group. +type K8sV1alpha1Client struct { + restClient rest.Interface +} + +func (c *K8sV1alpha1Client) NetworkQoSes(namespace string) NetworkQoSInterface { + return newNetworkQoSes(c, namespace) +} + +// NewForConfig creates a new K8sV1alpha1Client for the given config. +// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient), +// where httpClient was generated with rest.HTTPClientFor(c). +func NewForConfig(c *rest.Config) (*K8sV1alpha1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + httpClient, err := rest.HTTPClientFor(&config) + if err != nil { + return nil, err + } + return NewForConfigAndClient(&config, httpClient) +} + +// NewForConfigAndClient creates a new K8sV1alpha1Client for the given config and http client. +// Note the http client provided takes precedence over the configured transport values. +func NewForConfigAndClient(c *rest.Config, h *http.Client) (*K8sV1alpha1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + client, err := rest.RESTClientForConfigAndClient(&config, h) + if err != nil { + return nil, err + } + return &K8sV1alpha1Client{client}, nil +} + +// NewForConfigOrDie creates a new K8sV1alpha1Client for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *K8sV1alpha1Client { + client, err := NewForConfig(c) + if err != nil { + panic(err) + } + return client +} + +// New creates a new K8sV1alpha1Client for the given RESTClient. +func New(c rest.Interface) *K8sV1alpha1Client { + return &K8sV1alpha1Client{c} +} + +func setConfigDefaults(config *rest.Config) error { + gv := networkqosv1alpha1.SchemeGroupVersion + config.GroupVersion = &gv + config.APIPath = "/apis" + config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion() + + if config.UserAgent == "" { + config.UserAgent = rest.DefaultKubernetesUserAgent() + } + + return nil +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *K8sV1alpha1Client) RESTClient() rest.Interface { + if c == nil { + return nil + } + return c.restClient +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/factory.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/factory.go new file mode 100644 index 0000000000..256a9cf5d1 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/factory.go @@ -0,0 +1,261 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + reflect "reflect" + sync "sync" + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces" + networkqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// SharedInformerOption defines the functional option type for SharedInformerFactory. +type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory + +type sharedInformerFactory struct { + client versioned.Interface + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc + lock sync.Mutex + defaultResync time.Duration + customResync map[reflect.Type]time.Duration + transform cache.TransformFunc + + informers map[reflect.Type]cache.SharedIndexInformer + // startedInformers is used for tracking which informers have been started. + // This allows Start() to be called multiple times safely. + startedInformers map[reflect.Type]bool + // wg tracks how many goroutines were started. + wg sync.WaitGroup + // shuttingDown is true when Shutdown has been called. It may still be running + // because it needs to wait for goroutines. + shuttingDown bool +} + +// WithCustomResyncConfig sets a custom resync period for the specified informer types. +func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + for k, v := range resyncConfig { + factory.customResync[reflect.TypeOf(k)] = v + } + return factory + } +} + +// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. +func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.tweakListOptions = tweakListOptions + return factory + } +} + +// WithNamespace limits the SharedInformerFactory to the specified namespace. +func WithNamespace(namespace string) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.namespace = namespace + return factory + } +} + +// WithTransform sets a transform on all informers. +func WithTransform(transform cache.TransformFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.transform = transform + return factory + } +} + +// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. +func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync) +} + +// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. +// Listers obtained via this SharedInformerFactory will be subject to the same filters +// as specified here. +// Deprecated: Please use NewSharedInformerFactoryWithOptions instead +func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) +} + +// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. +func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { + factory := &sharedInformerFactory{ + client: client, + namespace: v1.NamespaceAll, + defaultResync: defaultResync, + informers: make(map[reflect.Type]cache.SharedIndexInformer), + startedInformers: make(map[reflect.Type]bool), + customResync: make(map[reflect.Type]time.Duration), + } + + // Apply all options + for _, opt := range options { + factory = opt(factory) + } + + return factory +} + +func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { + f.lock.Lock() + defer f.lock.Unlock() + + if f.shuttingDown { + return + } + + for informerType, informer := range f.informers { + if !f.startedInformers[informerType] { + f.wg.Add(1) + // We need a new variable in each loop iteration, + // otherwise the goroutine would use the loop variable + // and that keeps changing. + informer := informer + go func() { + defer f.wg.Done() + informer.Run(stopCh) + }() + f.startedInformers[informerType] = true + } + } +} + +func (f *sharedInformerFactory) Shutdown() { + f.lock.Lock() + f.shuttingDown = true + f.lock.Unlock() + + // Will return immediately if there is nothing to wait for. + f.wg.Wait() +} + +func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + informers := func() map[reflect.Type]cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informers := map[reflect.Type]cache.SharedIndexInformer{} + for informerType, informer := range f.informers { + if f.startedInformers[informerType] { + informers[informerType] = informer + } + } + return informers + }() + + res := map[reflect.Type]bool{} + for informType, informer := range informers { + res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) + } + return res +} + +// InformerFor returns the SharedIndexInformer for obj using an internal +// client. +func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informerType := reflect.TypeOf(obj) + informer, exists := f.informers[informerType] + if exists { + return informer + } + + resyncPeriod, exists := f.customResync[informerType] + if !exists { + resyncPeriod = f.defaultResync + } + + informer = newFunc(f.client, resyncPeriod) + informer.SetTransform(f.transform) + f.informers[informerType] = informer + + return informer +} + +// SharedInformerFactory provides shared informers for resources in all known +// API group versions. +// +// It is typically used like this: +// +// ctx, cancel := context.Background() +// defer cancel() +// factory := NewSharedInformerFactory(client, resyncPeriod) +// defer factory.WaitForStop() // Returns immediately if nothing was started. +// genericInformer := factory.ForResource(resource) +// typedInformer := factory.SomeAPIGroup().V1().SomeType() +// factory.Start(ctx.Done()) // Start processing these informers. +// synced := factory.WaitForCacheSync(ctx.Done()) +// for v, ok := range synced { +// if !ok { +// fmt.Fprintf(os.Stderr, "caches failed to sync: %v", v) +// return +// } +// } +// +// // Creating informers can also be created after Start, but then +// // Start must be called again: +// anotherGenericInformer := factory.ForResource(resource) +// factory.Start(ctx.Done()) +type SharedInformerFactory interface { + internalinterfaces.SharedInformerFactory + + // Start initializes all requested informers. They are handled in goroutines + // which run until the stop channel gets closed. + // Warning: Start does not block. When run in a go-routine, it will race with a later WaitForCacheSync. + Start(stopCh <-chan struct{}) + + // Shutdown marks a factory as shutting down. At that point no new + // informers can be started anymore and Start will return without + // doing anything. + // + // In addition, Shutdown blocks until all goroutines have terminated. For that + // to happen, the close channel(s) that they were started with must be closed, + // either before Shutdown gets called or while it is waiting. + // + // Shutdown may be called multiple times, even concurrently. All such calls will + // block until all goroutines have terminated. + Shutdown() + + // WaitForCacheSync blocks until all started informers' caches were synced + // or the stop channel gets closed. + WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool + + // ForResource gives generic access to a shared informer of the matching type. + ForResource(resource schema.GroupVersionResource) (GenericInformer, error) + + // InformerFor returns the SharedIndexInformer for obj using an internal + // client. + InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer + + K8s() networkqos.Interface +} + +func (f *sharedInformerFactory) K8s() networkqos.Interface { + return networkqos.New(f, f.namespace, f.tweakListOptions) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/generic.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/generic.go new file mode 100644 index 0000000000..53377ad262 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/generic.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + fmt "fmt" + + v1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// GenericInformer is type of SharedIndexInformer which will locate and delegate to other +// sharedInformers based on type +type GenericInformer interface { + Informer() cache.SharedIndexInformer + Lister() cache.GenericLister +} + +type genericInformer struct { + informer cache.SharedIndexInformer + resource schema.GroupResource +} + +// Informer returns the SharedIndexInformer. +func (f *genericInformer) Informer() cache.SharedIndexInformer { + return f.informer +} + +// Lister returns the GenericLister. +func (f *genericInformer) Lister() cache.GenericLister { + return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) +} + +// ForResource gives generic access to a shared informer of the matching type +// TODO extend this to unknown resources with a client pool +func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { + switch resource { + // Group=k8s.ovn.org, Version=v1alpha1 + case v1alpha1.SchemeGroupVersion.WithResource("networkqoses"): + return &genericInformer{resource: resource.GroupResource(), informer: f.K8s().V1alpha1().NetworkQoSes().Informer()}, nil + + } + + return nil, fmt.Errorf("no informer found for %v", resource) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go new file mode 100644 index 0000000000..b53e7b5784 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package internalinterfaces + +import ( + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + cache "k8s.io/client-go/tools/cache" +) + +// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. +type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer + +// SharedInformerFactory a small interface to allow for adding an informer without an import cycle +type SharedInformerFactory interface { + Start(stopCh <-chan struct{}) + InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer +} + +// TweakListOptionsFunc is a function that transforms a v1.ListOptions. +type TweakListOptionsFunc func(*v1.ListOptions) diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/interface.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/interface.go new file mode 100644 index 0000000000..089ac8d769 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/interface.go @@ -0,0 +1,45 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package networkqos + +import ( + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces" + v1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1" +) + +// Interface provides access to each of this group's versions. +type Interface interface { + // V1alpha1 provides access to shared informers for resources in V1alpha1. + V1alpha1() v1alpha1.Interface +} + +type group struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// V1alpha1 returns a new v1alpha1.Interface. +func (g *group) V1alpha1() v1alpha1.Interface { + return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/interface.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/interface.go new file mode 100644 index 0000000000..8ba24801e1 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/interface.go @@ -0,0 +1,44 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to all the informers in this group version. +type Interface interface { + // NetworkQoSes returns a NetworkQoSInformer. + NetworkQoSes() NetworkQoSInformer +} + +type version struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// NetworkQoSes returns a NetworkQoSInformer. +func (v *version) NetworkQoSes() NetworkQoSInformer { + return &networkQoSInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/networkqos.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/networkqos.go new file mode 100644 index 0000000000..a1896ab27b --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1/networkqos.go @@ -0,0 +1,89 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + context "context" + time "time" + + crdnetworkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/internalinterfaces" + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// NetworkQoSInformer provides access to a shared informer and lister for +// NetworkQoSes. +type NetworkQoSInformer interface { + Informer() cache.SharedIndexInformer + Lister() networkqosv1alpha1.NetworkQoSLister +} + +type networkQoSInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewNetworkQoSInformer constructs a new informer for NetworkQoS type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewNetworkQoSInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredNetworkQoSInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredNetworkQoSInformer constructs a new informer for NetworkQoS type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredNetworkQoSInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1alpha1().NetworkQoSes(namespace).List(context.TODO(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1alpha1().NetworkQoSes(namespace).Watch(context.TODO(), options) + }, + }, + &crdnetworkqosv1alpha1.NetworkQoS{}, + resyncPeriod, + indexers, + ) +} + +func (f *networkQoSInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredNetworkQoSInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *networkQoSInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&crdnetworkqosv1alpha1.NetworkQoS{}, f.defaultInformer) +} + +func (f *networkQoSInformer) Lister() networkqosv1alpha1.NetworkQoSLister { + return networkqosv1alpha1.NewNetworkQoSLister(f.Informer().GetIndexer()) +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/expansion_generated.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/expansion_generated.go new file mode 100644 index 0000000000..baf31eb955 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/expansion_generated.go @@ -0,0 +1,26 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +// NetworkQoSListerExpansion allows custom methods to be added to +// NetworkQoSLister. +type NetworkQoSListerExpansion interface{} + +// NetworkQoSNamespaceListerExpansion allows custom methods to be added to +// NetworkQoSNamespaceLister. +type NetworkQoSNamespaceListerExpansion interface{} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/networkqos.go b/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/networkqos.go new file mode 100644 index 0000000000..acaa632fdb --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1/networkqos.go @@ -0,0 +1,69 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + labels "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers" + cache "k8s.io/client-go/tools/cache" +) + +// NetworkQoSLister helps list NetworkQoSes. +// All objects returned here must be treated as read-only. +type NetworkQoSLister interface { + // List lists all NetworkQoSes in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*networkqosv1alpha1.NetworkQoS, err error) + // NetworkQoSes returns an object that can list and get NetworkQoSes. + NetworkQoSes(namespace string) NetworkQoSNamespaceLister + NetworkQoSListerExpansion +} + +// networkQoSLister implements the NetworkQoSLister interface. +type networkQoSLister struct { + listers.ResourceIndexer[*networkqosv1alpha1.NetworkQoS] +} + +// NewNetworkQoSLister returns a new NetworkQoSLister. +func NewNetworkQoSLister(indexer cache.Indexer) NetworkQoSLister { + return &networkQoSLister{listers.New[*networkqosv1alpha1.NetworkQoS](indexer, networkqosv1alpha1.Resource("networkqos"))} +} + +// NetworkQoSes returns an object that can list and get NetworkQoSes. +func (s *networkQoSLister) NetworkQoSes(namespace string) NetworkQoSNamespaceLister { + return networkQoSNamespaceLister{listers.NewNamespaced[*networkqosv1alpha1.NetworkQoS](s.ResourceIndexer, namespace)} +} + +// NetworkQoSNamespaceLister helps list and get NetworkQoSes. +// All objects returned here must be treated as read-only. +type NetworkQoSNamespaceLister interface { + // List lists all NetworkQoSes in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*networkqosv1alpha1.NetworkQoS, err error) + // Get retrieves the NetworkQoS from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*networkqosv1alpha1.NetworkQoS, error) + NetworkQoSNamespaceListerExpansion +} + +// networkQoSNamespaceLister implements the NetworkQoSNamespaceLister +// interface. +type networkQoSNamespaceLister struct { + listers.ResourceIndexer[*networkqosv1alpha1.NetworkQoS] +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/doc.go b/go-controller/pkg/crd/networkqos/v1alpha1/doc.go new file mode 100644 index 0000000000..4d09443215 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/doc.go @@ -0,0 +1,4 @@ +// Package v1alpha1 contains API Schema definitions for the network v1 API group +// +k8s:deepcopy-gen=package +// +groupName=k8s.ovn.org +package v1alpha1 diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/register.go b/go-controller/pkg/crd/networkqos/v1alpha1/register.go new file mode 100644 index 0000000000..21c80fdb89 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/register.go @@ -0,0 +1,34 @@ +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + GroupName = "k8s.ovn.org" + SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Kind takes an unqualified kind and returns back a Group qualified GroupKind +func Kind(kind string) schema.GroupKind { + return SchemeGroupVersion.WithKind(kind).GroupKind() +} + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +// Adds the list of known types to api.Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &NetworkQoS{}, + &NetworkQoSList{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/types.go b/go-controller/pkg/crd/networkqos/v1alpha1/types.go new file mode 100644 index 0000000000..53ee00a712 --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/types.go @@ -0,0 +1,186 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + networkingv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" +) + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=networkqoses +// +kubebuilder::singular=networkqos +// +kubebuilder:object:root=true +// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=".status.status" +// +kubebuilder:subresource:status +// NetworkQoS is a CRD that allows the user to define a DSCP marking and metering +// for pods ingress/egress traffic on its namespace to specified CIDRs, +// protocol and port. Traffic belong these pods will be checked against +// each Rule in the namespace's NetworkQoS, and if there is a match the traffic +// is marked with relevant DSCP value and enforcing specified policing +// parameters. +type NetworkQoS struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec Spec `json:"spec,omitempty"` + Status Status `json:"status,omitempty"` +} + +// Spec defines the desired state of NetworkQoS +type Spec struct { + // networkSelector selects the networks on which the pod IPs need to be added to the source address set. + // NetworkQoS controller currently supports `NetworkAttachmentDefinitions` type only. + // +optional + // +kubebuilder:validation:XValidation:rule="self == oldSelf", message="networkSelector is immutable" + // +kubebuilder:validation:XValidation:rule="self.all(sel, sel.networkSelectionType == 'ClusterUserDefinedNetworks' || sel.networkSelectionType == 'NetworkAttachmentDefinitions')", message="Unsupported network selection type" + NetworkSelectors crdtypes.NetworkSelectors `json:"networkSelectors,omitempty"` + + // podSelector applies the NetworkQoS rule only to the pods in the namespace whose label + // matches this definition. This field is optional, and in case it is not set + // results in the rule being applied to all pods in the namespace. + // +optional + PodSelector metav1.LabelSelector `json:"podSelector,omitempty"` + + // priority is a value from 0 to 100 and represents the NetworkQoS' priority. + // QoSes with numerically higher priority takes precedence over those with lower. + // +kubebuilder:validation:Maximum:=100 + // +kubebuilder:validation:Minimum:=0 + Priority int `json:"priority"` + + // egress a collection of Egress NetworkQoS rule objects. A total of 20 rules will + // be allowed in each NetworkQoS instance. The relative precedence of egress rules + // within a single NetworkQos object (all of which share the priority) will be + // determined by the order in which the rule is written. Thus, a rule that appears + // first in the list of egress rules would take the lower precedence. + // +kubebuilder:validation:MaxItems=20 + Egress []Rule `json:"egress"` +} + +type Rule struct { + // dscp marking value for matching pods' traffic. + // +kubebuilder:validation:Maximum:=63 + // +kubebuilder:validation:Minimum:=0 + DSCP int `json:"dscp"` + + // classifier The classifier on which packets should match + // to apply the NetworkQoS Rule. + // This field is optional, and in case it is not set the rule is applied + // to all egress traffic regardless of the destination. + // +optional + Classifier Classifier `json:"classifier"` + + // +optional + Bandwidth Bandwidth `json:"bandwidth"` +} + +type Classifier struct { + // +optional + To []Destination `json:"to"` + + // +optional + Ports []*Port `json:"ports"` +} + +// Bandwidth controls the maximum of rate traffic that can be sent +// or received on the matching packets. +type Bandwidth struct { + // rate The value of rate limit in kbps. Traffic over the limit + // will be dropped. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=4294967295 + // +optional + Rate uint32 `json:"rate"` + + // burst The value of burst rate limit in kilobits. + // This also needs rate to be specified. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=4294967295 + // +optional + Burst uint32 `json:"burst"` +} + +// Port specifies destination protocol and port on which NetworkQoS +// rule is applied +type Port struct { + // protocol (tcp, udp, sctp) that the traffic must match. + // +kubebuilder:validation:Pattern=^TCP|UDP|SCTP$ + // +optional + Protocol string `json:"protocol"` + + // port that the traffic must match + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=65535 + // +optional + Port *int32 `json:"port"` +} + +// Destination describes a peer to apply NetworkQoS configuration for the outgoing traffic. +// Only certain combinations of fields are allowed. +// +kubebuilder:validation:XValidation:rule="!(has(self.ipBlock) && (has(self.podSelector) || has(self.namespaceSelector)))",message="Can't specify both podSelector/namespaceSelector and ipBlock" +type Destination struct { + // podSelector is a label selector which selects pods. This field follows standard label + // selector semantics; if present but empty, it selects all pods. + // + // If namespaceSelector is also set, then the NetworkQoS as a whole selects + // the pods matching podSelector in the Namespaces selected by NamespaceSelector. + // Otherwise it selects the pods matching podSelector in the NetworkQoS's own namespace. + // +optional + PodSelector *metav1.LabelSelector `json:"podSelector,omitempty" protobuf:"bytes,1,opt,name=podSelector"` + + // namespaceSelector selects namespaces using cluster-scoped labels. This field follows + // standard label selector semantics; if present but empty, it selects all namespaces. + // + // If podSelector is also set, then the NetworkQoS as a whole selects + // the pods matching podSelector in the namespaces selected by namespaceSelector. + // Otherwise it selects all pods in the namespaces selected by namespaceSelector. + // +optional + NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty" protobuf:"bytes,2,opt,name=namespaceSelector"` + + // ipBlock defines policy on a particular IPBlock. If this field is set then + // neither of the other fields can be. + // +optional + IPBlock *networkingv1.IPBlock `json:"ipBlock,omitempty" protobuf:"bytes,3,rep,name=ipBlock"` +} + +// Status defines the observed state of NetworkQoS +type Status struct { + // A concise indication of whether the NetworkQoS resource is applied with success. + // +optional + Status string `json:"status,omitempty"` + + // An array of condition objects indicating details about status of NetworkQoS object. + // +optional + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=networkqoses +// +kubebuilder::singular=networkqos +// NetworkQoSList contains a list of NetworkQoS +type NetworkQoSList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NetworkQoS `json:"items"` +} diff --git a/go-controller/pkg/crd/networkqos/v1alpha1/zz_generated.deepcopy.go b/go-controller/pkg/crd/networkqos/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000000..720119ff8a --- /dev/null +++ b/go-controller/pkg/crd/networkqos/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,263 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + types "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + networkingv1 "k8s.io/api/networking/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Bandwidth) DeepCopyInto(out *Bandwidth) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Bandwidth. +func (in *Bandwidth) DeepCopy() *Bandwidth { + if in == nil { + return nil + } + out := new(Bandwidth) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Classifier) DeepCopyInto(out *Classifier) { + *out = *in + if in.To != nil { + in, out := &in.To, &out.To + *out = make([]Destination, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Ports != nil { + in, out := &in.Ports, &out.Ports + *out = make([]*Port, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Port) + (*in).DeepCopyInto(*out) + } + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Classifier. +func (in *Classifier) DeepCopy() *Classifier { + if in == nil { + return nil + } + out := new(Classifier) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Destination) DeepCopyInto(out *Destination) { + *out = *in + if in.PodSelector != nil { + in, out := &in.PodSelector, &out.PodSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.NamespaceSelector != nil { + in, out := &in.NamespaceSelector, &out.NamespaceSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.IPBlock != nil { + in, out := &in.IPBlock, &out.IPBlock + *out = new(networkingv1.IPBlock) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Destination. +func (in *Destination) DeepCopy() *Destination { + if in == nil { + return nil + } + out := new(Destination) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkQoS) DeepCopyInto(out *NetworkQoS) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkQoS. +func (in *NetworkQoS) DeepCopy() *NetworkQoS { + if in == nil { + return nil + } + out := new(NetworkQoS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NetworkQoS) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkQoSList) DeepCopyInto(out *NetworkQoSList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NetworkQoS, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkQoSList. +func (in *NetworkQoSList) DeepCopy() *NetworkQoSList { + if in == nil { + return nil + } + out := new(NetworkQoSList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NetworkQoSList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Port) DeepCopyInto(out *Port) { + *out = *in + if in.Port != nil { + in, out := &in.Port, &out.Port + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port. +func (in *Port) DeepCopy() *Port { + if in == nil { + return nil + } + out := new(Port) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Rule) DeepCopyInto(out *Rule) { + *out = *in + in.Classifier.DeepCopyInto(&out.Classifier) + out.Bandwidth = in.Bandwidth + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Rule. +func (in *Rule) DeepCopy() *Rule { + if in == nil { + return nil + } + out := new(Rule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Spec) DeepCopyInto(out *Spec) { + *out = *in + if in.NetworkSelectors != nil { + in, out := &in.NetworkSelectors, &out.NetworkSelectors + *out = make(types.NetworkSelectors, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.PodSelector.DeepCopyInto(&out.PodSelector) + if in.Egress != nil { + in, out := &in.Egress, &out.Egress + *out = make([]Rule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Spec. +func (in *Spec) DeepCopy() *Spec { + if in == nil { + return nil + } + out := new(Spec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Status) DeepCopyInto(out *Status) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Status. +func (in *Status) DeepCopy() *Status { + if in == nil { + return nil + } + out := new(Status) + in.DeepCopyInto(out) + return out +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index f5d6b539de..b3277ea9cf 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -81,6 +81,11 @@ import ( egressservicescheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/scheme" egressserviceinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions" egressserviceinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/informers/externalversions/egressservice/v1" + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/scheme" + networkqosinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions" + networkqosinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1" + networkqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1" routeadvertisementsapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1" routeadvertisementsscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/scheme" routeadvertisementsinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/informers/externalversions" @@ -118,6 +123,7 @@ type WatchFactory struct { udnFactory userdefinednetworkapiinformerfactory.SharedInformerFactory raFactory routeadvertisementsinformerfactory.SharedInformerFactory frrFactory frrinformerfactory.SharedInformerFactory + networkQoSFactory networkqosinformerfactory.SharedInformerFactory informers map[reflect.Type]*informer stopChan chan struct{} @@ -145,6 +151,7 @@ func (wf *WatchFactory) ShallowClone() *WatchFactory { udnFactory: wf.udnFactory, raFactory: wf.raFactory, frrFactory: wf.frrFactory, + networkQoSFactory: wf.networkQoSFactory, informers: wf.informers, stopChan: wf.stopChan, @@ -239,6 +246,7 @@ var ( IPAMClaimsType reflect.Type = reflect.TypeOf(&ipamclaimsapi.IPAMClaim{}) UserDefinedNetworkType reflect.Type = reflect.TypeOf(&userdefinednetworkapi.UserDefinedNetwork{}) ClusterUserDefinedNetworkType reflect.Type = reflect.TypeOf(&userdefinednetworkapi.ClusterUserDefinedNetwork{}) + NetworkQoSType reflect.Type = reflect.TypeOf(&networkqosapi.NetworkQoS{}) // Resource types used in ovnk node NamespaceExGwType reflect.Type = reflect.TypeOf(&namespaceExGw{}) @@ -306,6 +314,7 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient mnpFactory: mnpinformerfactory.NewSharedInformerFactory(ovnClientset.MultiNetworkPolicyClient, resyncInterval), egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), apbRouteFactory: adminbasedpolicyinformerfactory.NewSharedInformerFactory(ovnClientset.AdminPolicyRouteClient, resyncInterval), + networkQoSFactory: networkqosinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkQoSClient, resyncInterval), informers: make(map[reflect.Type]*informer), stopChan: make(chan struct{}), } @@ -351,6 +360,10 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient return nil, err } + if err := networkqosapi.AddToScheme(networkqosscheme.Scheme); err != nil { + return nil, err + } + // For Services and Endpoints, pre-populate the shared Informer with one that // has a label selector excluding headless services. wf.iFactory.InformerFor(&corev1.Service{}, func(c kubernetes.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { @@ -501,6 +514,14 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient wf.raFactory.K8s().V1().RouteAdvertisements().Informer() } + if config.OVNKubernetesFeature.EnableNetworkQoS { + wf.informers[NetworkQoSType], err = newQueuedInformer(eventQueueSize, NetworkQoSType, + wf.networkQoSFactory.K8s().V1alpha1().NetworkQoSes().Informer(), wf.stopChan, minNumEventQueues) + if err != nil { + return nil, err + } + } + return wf, nil } @@ -608,6 +629,15 @@ func (wf *WatchFactory) Start() error { } } + if config.OVNKubernetesFeature.EnableNetworkQoS && wf.networkQoSFactory != nil { + wf.networkQoSFactory.Start(wf.stopChan) + for oType, synced := range waitForCacheSyncWithTimeout(wf.networkQoSFactory, wf.stopChan) { + if !synced { + return fmt.Errorf("error in syncing cache for %v informer", oType) + } + } + } + if util.IsNetworkSegmentationSupportEnabled() && wf.udnFactory != nil { wf.udnFactory.Start(wf.stopChan) for oType, synced := range waitForCacheSyncWithTimeout(wf.udnFactory, wf.stopChan) { @@ -635,6 +665,15 @@ func (wf *WatchFactory) Start() error { } } + if config.OVNKubernetesFeature.EnableNetworkQoS && wf.networkQoSFactory != nil { + wf.networkQoSFactory.Start(wf.stopChan) + for oType, synced := range waitForCacheSyncWithTimeout(wf.networkQoSFactory, wf.stopChan) { + if !synced { + return fmt.Errorf("error in syncing cache for %v informer", oType) + } + } + } + return nil } @@ -680,9 +719,14 @@ func (wf *WatchFactory) Stop() { if wf.raFactory != nil { wf.raFactory.Shutdown() } + if wf.frrFactory != nil { wf.frrFactory.Shutdown() } + + if wf.networkQoSFactory != nil { + wf.networkQoSFactory.Shutdown() + } } // NewNodeWatchFactory initializes a watch factory with significantly fewer @@ -869,6 +913,7 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset dnsFactory: ocpnetworkinformerfactory.NewSharedInformerFactoryWithOptions(ovnClientset.OCPNetworkClient, resyncInterval, ocpnetworkinformerfactory.WithNamespace(config.Kubernetes.OVNConfigNamespace)), apbRouteFactory: adminbasedpolicyinformerfactory.NewSharedInformerFactory(ovnClientset.AdminPolicyRouteClient, resyncInterval), egressQoSFactory: egressqosinformerfactory.NewSharedInformerFactory(ovnClientset.EgressQoSClient, resyncInterval), + networkQoSFactory: networkqosinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkQoSClient, resyncInterval), informers: make(map[reflect.Type]*informer), stopChan: make(chan struct{}), } @@ -1147,6 +1192,10 @@ func getObjectMeta(objType reflect.Type, obj interface{}) (*metav1.ObjectMeta, e if cudn, ok := obj.(*userdefinednetworkapi.ClusterUserDefinedNetwork); ok { return &cudn.ObjectMeta, nil } + case NetworkQoSType: + if networkQoS, ok := obj.(*networkqosapi.NetworkQoS); ok { + return &networkQoS.ObjectMeta, nil + } } return nil, fmt.Errorf("cannot get ObjectMeta from type %v", objType) @@ -1413,6 +1462,11 @@ func (wf *WatchFactory) RemoveBaselineAdminNetworkPolicyHandler(handler *Handler wf.removeHandler(BaselineAdminNetworkPolicyType, handler) } +// RemoveNetworkQoSHandler removes an NetworkQoS object event handler function +func (wf *WatchFactory) RemoveNetworkQoSHandler(handler *Handler) { + wf.removeHandler(NetworkQoSType, handler) +} + // AddNetworkAttachmentDefinitionHandler adds a handler function that will be executed on NetworkAttachmentDefinition object changes func (wf *WatchFactory) AddNetworkAttachmentDefinitionHandler(handlerFuncs cache.ResourceEventHandler, processExisting func([]interface{}) error) (*Handler, error) { return wf.addHandler(NetworkAttachmentDefinitionType, "", nil, handlerFuncs, processExisting, defaultHandlerPriority) @@ -1636,6 +1690,11 @@ func (wf *WatchFactory) GetEgressFirewall(namespace, name string) (*egressfirewa return egressFirewallLister.EgressFirewalls(namespace).Get(name) } +func (wf *WatchFactory) GetNetworkQoSes() ([]*networkqosapi.NetworkQoS, error) { + networkQosLister := wf.informers[NetworkQoSType].lister.(networkqoslister.NetworkQoSLister) + return networkQosLister.List(labels.Everything()) +} + func (wf *WatchFactory) CertificateSigningRequestInformer() certificatesinformers.CertificateSigningRequestInformer { return wf.iFactory.Certificates().V1().CertificateSigningRequests() } @@ -1760,6 +1819,10 @@ func (wf *WatchFactory) FRRConfigurationsInformer() frrinformer.FRRConfiguration return wf.frrFactory.Api().V1beta1().FRRConfigurations() } +func (wf *WatchFactory) NetworkQoSInformer() networkqosinformer.NetworkQoSInformer { + return wf.networkQoSFactory.K8s().V1alpha1().NetworkQoSes() +} + // withServiceNameAndNoHeadlessServiceSelector returns a LabelSelector (added to the // watcher for EndpointSlices) that will only choose EndpointSlices with a non-empty // "kubernetes.io/service-name" label and without "service.kubernetes.io/headless" diff --git a/go-controller/pkg/factory/factory_test.go b/go-controller/pkg/factory/factory_test.go index 7ca9951356..a2bcf974c3 100644 --- a/go-controller/pkg/factory/factory_test.go +++ b/go-controller/pkg/factory/factory_test.go @@ -38,6 +38,9 @@ import ( egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + networkqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" . "github.com/onsi/ginkgo/v2" @@ -223,6 +226,45 @@ func newIPAMClaim(name string) *ipamclaimsapi.IPAMClaim { } } +func newNetworkQoS(name, namespace string) *networkqos.NetworkQoS { + return &networkqos.NetworkQoS{ + ObjectMeta: newObjectMeta(name, namespace), + Spec: networkqos.Spec{ + NetworkSelectors: []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.NetworkAttachmentDefinitions, + NetworkAttachmentDefinitionSelector: &crdtypes.NetworkAttachmentDefinitionSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "stream", + }, + }, + }, + }, + }, + Priority: 100, + Egress: []networkqos.Rule{ + { + DSCP: 50, + Classifier: networkqos.Classifier{ + To: []networkqos.Destination{ + { + IPBlock: &knet.IPBlock{ + CIDR: "1.2.3.4/32", + }, + }, + }, + }, + Bandwidth: networkqos.Bandwidth{ + Rate: 20000, + Burst: 10, + }, + }, + }, + }, + } +} + func objSetup(c *fake.Clientset, objType string, listFn func(core.Action) (bool, runtime.Object, error)) *watch.FakeWatcher { w := watch.NewFake() c.AddWatchReactor(objType, core.DefaultWatchReactor(w, nil)) @@ -279,6 +321,13 @@ func ipamClaimsObjSetup(c *ipamclaimsapifake.Clientset, objType string, listFn f return w } +func networkQoSObjSetup(c *networkqosfake.Clientset, objType string, listFn func(core.Action) (bool, runtime.Object, error)) *watch.FakeWatcher { + w := watch.NewFake() + c.AddWatchReactor(objType, core.DefaultWatchReactor(w, nil)) + c.AddReactor("list", objType, listFn) + return w +} + type handlerCalls struct { added int32 updated int32 @@ -310,6 +359,7 @@ var _ = Describe("Watch Factory Operations", func() { adminNetworkPolicyFakeClient *anpapifake.Clientset ipamClaimsFakeClient *ipamclaimsapifake.Clientset nadsFakeClient *nadsfake.Clientset + networkQoSFakeClient *networkqosfake.Clientset podWatch, namespaceWatch, nodeWatch *watch.FakeWatcher policyWatch, serviceWatch *watch.FakeWatcher endpointSliceWatch *watch.FakeWatcher @@ -321,6 +371,7 @@ var _ = Describe("Watch Factory Operations", func() { adminNetPolWatch *watch.FakeWatcher baselineAdminNetPolWatch *watch.FakeWatcher ipamClaimsWatch *watch.FakeWatcher + networkQoSWatch *watch.FakeWatcher pods []*corev1.Pod namespaces []*corev1.Namespace nodes []*corev1.Node @@ -336,6 +387,7 @@ var _ = Describe("Watch Factory Operations", func() { adminNetworkPolicies []*anpapi.AdminNetworkPolicy baselineAdminNetworkPolicies []*anpapi.BaselineAdminNetworkPolicy ipamClaims []*ipamclaimsapi.IPAMClaim + networkQoSes []*networkqos.NetworkQoS err error shutdown bool ) @@ -355,6 +407,7 @@ var _ = Describe("Watch Factory Operations", func() { config.OVNKubernetesFeature.EnableAdminNetworkPolicy = true config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnablePersistentIPs = true + config.OVNKubernetesFeature.EnableNetworkQoS = true config.Kubernetes.PlatformType = string(ocpconfigapi.AWSPlatformType) fakeClient = &fake.Clientset{} @@ -366,6 +419,7 @@ var _ = Describe("Watch Factory Operations", func() { adminNetworkPolicyFakeClient = &anpapifake.Clientset{} ipamClaimsFakeClient = &ipamclaimsapifake.Clientset{} nadsFakeClient = &nadsfake.Clientset{} + networkQoSFakeClient = &networkqosfake.Clientset{} ovnClientset = &util.OVNMasterClientset{ KubeClient: fakeClient, @@ -377,6 +431,7 @@ var _ = Describe("Watch Factory Operations", func() { EgressServiceClient: egressServiceFakeClient, IPAMClaimsClient: ipamClaimsFakeClient, NetworkAttchDefClient: nadsFakeClient, + NetworkQoSClient: networkQoSFakeClient, } ovnCMClientset = &util.OVNClusterManagerClientset{ KubeClient: fakeClient, @@ -513,6 +568,16 @@ var _ = Describe("Watch Factory Operations", func() { } return true, obj, nil }) + + networkQoSes = make([]*networkqos.NetworkQoS, 0) + networkQoSWatch = networkQoSObjSetup(networkQoSFakeClient, "networkqoses", func(core.Action) (bool, runtime.Object, error) { + obj := &networkqos.NetworkQoSList{} + for _, p := range networkQoSes { + obj.Items = append(obj.Items, *p) + } + return true, obj, nil + }) + shutdown = false }) @@ -673,6 +738,10 @@ var _ = Describe("Watch Factory Operations", func() { ipamClaims = append(ipamClaims, newIPAMClaim("claim!")) testExisting(IPAMClaimsType, "", nil, defaultHandlerPriority) }) + It("is called for each existing networkQoS", func() { + networkQoSes = append(networkQoSes, newNetworkQoS("myNetworkQoS", "default")) + testExisting(NetworkQoSType, "", nil, defaultHandlerPriority) + }) It("is called for each existing pod that matches a given namespace and label", func() { pod := newPod("pod1", "default") @@ -787,6 +856,12 @@ var _ = Describe("Watch Factory Operations", func() { baselineAdminNetworkPolicies = append(baselineAdminNetworkPolicies, newBaselineAdminNetworkPolicy("myBANP2")) testExisting(BaselineAdminNetworkPolicyType) }) + It("calls ADD for each existing networkQoS", func() { + networkQoSes = append(networkQoSes, newNetworkQoS("myNetworkQoS", "default")) + networkQoSes = append(networkQoSes, newNetworkQoS("myNetworkQoS1", "default")) + testExisting(NetworkQoSType) + }) + It("doesn't deadlock when factory is shutdown", func() { // every queue has length 10, but some events may be handled before the stop channel event is selected, // so multiply by 15 instead of 10 to ensure overflow @@ -894,6 +969,20 @@ var _ = Describe("Watch Factory Operations", func() { }) }) + Context("when NetworkQoS is disabled", func() { + testExisting := func(objType reflect.Type) { + wf, err = NewMasterWatchFactory(ovnClientset) + Expect(err).NotTo(HaveOccurred()) + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(wf.informers).NotTo(HaveKey(objType)) + } + It("does not contain NetworkQoS informer", func() { + config.OVNKubernetesFeature.EnableNetworkQoS = false + testExisting(NetworkQoSType) + }) + }) + addFilteredHandler := func(wf *WatchFactory, objType reflect.Type, realObjType reflect.Type, namespace string, sel labels.Selector, funcs cache.ResourceEventHandlerFuncs) (*Handler, *handlerCalls) { calls := handlerCalls{} h, err := wf.addHandler(objType, namespace, sel, cache.ResourceEventHandlerFuncs{ @@ -2062,6 +2151,43 @@ var _ = Describe("Watch Factory Operations", func() { wf.RemoveIPAMClaimsHandler(h) }) + + It("responds to networkQoS add/update/delete events", func() { + wf, err = NewMasterWatchFactory(ovnClientset) + Expect(err).NotTo(HaveOccurred()) + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + + added := newNetworkQoS("myNetworkQoS", "default") + h, c := addHandler(wf, NetworkQoSType, cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + networkQoS := obj.(*networkqos.NetworkQoS) + Expect(reflect.DeepEqual(networkQoS, added)).To(BeTrue()) + }, + UpdateFunc: func(_, new interface{}) { + newNetworkQoS := new.(*networkqos.NetworkQoS) + Expect(reflect.DeepEqual(newNetworkQoS, added)).To(BeTrue()) + Expect(newNetworkQoS.Spec.Egress[0].DSCP).To(Equal(42)) + }, + DeleteFunc: func(obj interface{}) { + networkQoS := obj.(*networkqos.NetworkQoS) + Expect(reflect.DeepEqual(networkQoS, added)).To(BeTrue()) + }, + }) + + networkQoSes = append(networkQoSes, added) + networkQoSWatch.Add(added) + Eventually(c.getAdded, 2).Should(Equal(1)) + added.Spec.Egress[0].DSCP = 42 + networkQoSWatch.Modify(added) + Eventually(c.getUpdated, 2).Should(Equal(1)) + networkQoSes = networkQoSes[:0] + networkQoSWatch.Delete(added) + Eventually(c.getDeleted, 2).Should(Equal(1)) + + wf.RemoveNetworkQoSHandler(h) + }) + It("stops processing events after the handler is removed", func() { wf, err = NewMasterWatchFactory(ovnClientset) Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index 25c05470e4..1e87f7309b 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -26,6 +26,7 @@ import ( egressiplister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/listers/egressip/v1" egressqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/listers/egressqos/v1" egressservicelister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/listers/egressservice/v1" + networkqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1" userdefinednetworklister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/listers/userdefinednetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" ) @@ -504,6 +505,8 @@ func newInformerLister(oType reflect.Type, sharedInformer cache.SharedIndexInfor return userdefinednetworklister.NewUserDefinedNetworkLister(sharedInformer.GetIndexer()), nil case ClusterUserDefinedNetworkType: return userdefinednetworklister.NewClusterUserDefinedNetworkLister(sharedInformer.GetIndexer()), nil + case NetworkQoSType: + return networkqoslister.NewNetworkQoSLister(sharedInformer.GetIndexer()), nil } return nil, fmt.Errorf("cannot create lister from type %v", oType) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index 81cb5a2d30..4171e398e2 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -29,6 +29,7 @@ import ( egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" egressserviceclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" ) // InterfaceOVN represents the exported methods for dealing with getting/setting @@ -89,6 +90,7 @@ type KubeOVN struct { EgressQoSClient egressqosclientset.Interface IPAMClaimsClient ipamclaimssclientset.Interface NADClient nadclientset.Interface + NetworkQoSClient networkqosclientset.Interface } // SetAnnotationsOnPod takes the pod object and map of key/value string pairs to set as annotations diff --git a/go-controller/pkg/libovsdb/libovsdb.go b/go-controller/pkg/libovsdb/libovsdb.go index cd7a2bce72..40bd1298fe 100644 --- a/go-controller/pkg/libovsdb/libovsdb.go +++ b/go-controller/pkg/libovsdb/libovsdb.go @@ -140,10 +140,6 @@ func NewSBClientWithConfig(cfg config.OvnAuthConfig, promRegistry prometheus.Reg enableMetricsOption := client.WithMetricsRegistryNamespaceSubsystem(promRegistry, "ovnkube", "master_libovsdb") - dbModel.SetIndexes(map[string][]model.ClientIndex{ - sbdb.EncapTable: {{Columns: []model.ColumnKey{{Column: "chassis_name"}}}}, - }) - c, err := newClient(cfg, dbModel, stopCh, enableMetricsOption) if err != nil { return nil, err diff --git a/go-controller/pkg/libovsdb/ops/chassis.go b/go-controller/pkg/libovsdb/ops/chassis.go index c1d67b614a..0196da3463 100644 --- a/go-controller/pkg/libovsdb/ops/chassis.go +++ b/go-controller/pkg/libovsdb/ops/chassis.go @@ -138,27 +138,33 @@ func DeleteChassisWithPredicate(sbClient libovsdbclient.Client, p chassisPredica } // CreateOrUpdateChassis creates or updates the chassis record along with the encap record -func CreateOrUpdateChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis, encap *sbdb.Encap) error { +func CreateOrUpdateChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis, encaps ...*sbdb.Encap) error { m := newModelClient(sbClient) - opModels := []operationModel{ - { + opModels := make([]operationModel, 0, len(encaps)+1) + for i := range encaps { + encap := encaps[i] + opModel := operationModel{ Model: encap, DoAfter: func() { - chassis.Encaps = []string{encap.UUID} + encapsList := append(chassis.Encaps, encap.UUID) + chassis.Encaps = sets.New(encapsList...).UnsortedList() }, - OnModelUpdates: onModelUpdatesAllNonDefault(), + OnModelUpdates: onModelUpdatesNone(), ErrNotFound: false, BulkOp: false, - }, - { - Model: chassis, - OnModelMutations: []interface{}{&chassis.OtherConfig}, - OnModelUpdates: []interface{}{&chassis.Encaps}, - ErrNotFound: false, - BulkOp: false, - }, + } + opModels = append(opModels, opModel) + } + + opModel := operationModel{ + Model: chassis, + OnModelMutations: []interface{}{&chassis.OtherConfig}, + OnModelUpdates: []interface{}{&chassis.Encaps}, + ErrNotFound: false, + BulkOp: false, } + opModels = append(opModels, opModel) if _, err := m.CreateOrUpdate(opModels...); err != nil { return err } diff --git a/go-controller/pkg/libovsdb/ops/chassis_test.go b/go-controller/pkg/libovsdb/ops/chassis_test.go index 1d8f338081..7c60cc4217 100644 --- a/go-controller/pkg/libovsdb/ops/chassis_test.go +++ b/go-controller/pkg/libovsdb/ops/chassis_test.go @@ -181,3 +181,97 @@ func TestDeleteChassis(t *testing.T) { }) } } + +func TestCreateOrUpdateChassis(t *testing.T) { + uuid1 := "b9998337-2498-4d1e-86e6-fc0417abb2f0" + uuid2 := "b9998337-2498-4d1e-86e6-fc0417abb2f1" + uuid3 := "b9998337-2498-4d1e-86e6-fc0417abb2f2" + tests := []struct { + desc string + chassis *sbdb.Chassis + encaps []*sbdb.Encap + initialDB []libovsdbtest.TestData + expectedDB []libovsdbtest.TestData + }{ + { + desc: "create new chassis with encap records", + chassis: &sbdb.Chassis{Name: "test1"}, + encaps: []*sbdb.Encap{{ChassisName: "test1", IP: "10.0.0.10", Type: "geneve"}, + {ChassisName: "test1", IP: "10.0.0.11", Type: "geneve"}}, + initialDB: []libovsdbtest.TestData{}, + expectedDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test1", Encaps: []string{uuid2, uuid3}}, + &sbdb.Encap{UUID: uuid2, ChassisName: "test1", IP: "10.0.0.10", Type: "geneve"}, + &sbdb.Encap{UUID: uuid3, ChassisName: "test1", IP: "10.0.0.11", Type: "geneve"}, + }, + }, + { + desc: "update chassis by inserting new encap record", + chassis: &sbdb.Chassis{Name: "test2"}, + encaps: []*sbdb.Encap{{ChassisName: "test2", IP: "10.0.0.10", Type: "geneve"}, + {ChassisName: "test2", IP: "10.0.0.11", Type: "geneve"}}, + initialDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test2", Encaps: []string{uuid2}}, + &sbdb.Encap{UUID: uuid2, ChassisName: "test2", IP: "10.0.0.10", Type: "geneve"}, + }, + expectedDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test2", Encaps: []string{uuid2, uuid3}}, + &sbdb.Encap{UUID: uuid2, ChassisName: "test2", IP: "10.0.0.10", Type: "geneve"}, + &sbdb.Encap{UUID: uuid3, ChassisName: "test2", IP: "10.0.0.11", Type: "geneve"}, + }, + }, + { + desc: "update chassis by removing obsolete encap record", + chassis: &sbdb.Chassis{Name: "test3"}, + encaps: []*sbdb.Encap{{ChassisName: "test3", IP: "10.0.0.11", Type: "geneve"}}, + initialDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test3", Encaps: []string{uuid2, uuid3}}, + &sbdb.Encap{UUID: uuid2, ChassisName: "test3", IP: "10.0.0.10", Type: "geneve"}, + &sbdb.Encap{UUID: uuid3, ChassisName: "test3", IP: "10.0.0.11", Type: "geneve"}, + }, + expectedDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test3", Encaps: []string{uuid3}}, + &sbdb.Encap{UUID: uuid3, ChassisName: "test3", IP: "10.0.0.11", Type: "geneve"}, + }, + }, + { + desc: "update chassis by adding new encap record and deleting the old one", + chassis: &sbdb.Chassis{Name: "test4"}, + encaps: []*sbdb.Encap{{ChassisName: "test4", IP: "10.0.0.11", Type: "geneve"}}, + initialDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test4", Encaps: []string{uuid2}}, + &sbdb.Encap{UUID: uuid2, ChassisName: "test4", IP: "10.0.0.10", Type: "geneve"}, + }, + expectedDB: []libovsdbtest.TestData{ + &sbdb.Chassis{UUID: uuid1, Name: "test4", Encaps: []string{uuid3}}, + &sbdb.Encap{UUID: uuid3, ChassisName: "test4", IP: "10.0.0.11", Type: "geneve"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + dbSetup := libovsdbtest.TestSetup{ + SBData: tt.initialDB, + } + sbClient, cleanup, err := libovsdbtest.NewSBTestHarness(dbSetup, nil) + if err != nil { + t.Fatalf("%s: failed to set up test harness: %v", tt.desc, err) + } + t.Cleanup(cleanup.Cleanup) + + err = CreateOrUpdateChassis(sbClient, tt.chassis, tt.encaps...) + if err != nil { + t.Fatal(fmt.Errorf("%s: got unexpected error: %v", tt.desc, err)) + } + + matcher := libovsdbtest.HaveDataIgnoringUUIDs(tt.expectedDB) + match, err := matcher.Match(sbClient) + if err != nil { + t.Fatalf("%s: matcher error: %v", tt.desc, err) + } + if !match { + t.Fatalf("%s: DB state did not match: %s", tt.desc, matcher.FailureMessage(sbClient)) + } + }) + } +} diff --git a/go-controller/pkg/libovsdb/ops/db_object_types.go b/go-controller/pkg/libovsdb/ops/db_object_types.go index bb2afeea11..45c2777637 100644 --- a/go-controller/pkg/libovsdb/ops/db_object_types.go +++ b/go-controller/pkg/libovsdb/ops/db_object_types.go @@ -19,6 +19,7 @@ const ( EgressQoSOwnerType ownerType = "EgressQoS" AdminNetworkPolicyOwnerType ownerType = "AdminNetworkPolicy" BaselineAdminNetworkPolicyOwnerType ownerType = "BaselineAdminNetworkPolicy" + NetworkQoSOwnerType ownerType = "NetworkQoS" // NetworkPolicyOwnerType is deprecated for address sets, should only be used for sync. // New owner of network policy address sets, is PodSelectorOwnerType. NetworkPolicyOwnerType ownerType = "NetworkPolicy" @@ -35,6 +36,7 @@ const ( NetpolNamespaceOwnerType ownerType = "NetpolNamespace" VirtualMachineOwnerType ownerType = "VirtualMachine" UDNEnabledServiceOwnerType ownerType = "UDNEnabledService" + AdvertisedNetworkOwnerType ownerType = "AdvertisedNetwork" // NetworkPolicyPortIndexOwnerType is the old version of NetworkPolicyOwnerType, kept for sync only NetworkPolicyPortIndexOwnerType ownerType = "NetworkPolicyPortIndexOwnerType" // ClusterOwnerType means the object is cluster-scoped and doesn't belong to any k8s objects @@ -141,6 +143,28 @@ var AddressSetUDNEnabledService = newObjectIDsType(addressSet, UDNEnabledService IPFamilyKey, }) +var AddressSetNetworkQoS = newObjectIDsType(addressSet, NetworkQoSOwnerType, []ExternalIDKey{ + // nqos namespace:name + ObjectNameKey, + // rule index + RuleIndex, + IpBlockIndexKey, + IPFamilyKey, +}) + +var AddressSetAdvertisedNetwork = newObjectIDsType(addressSet, AdvertisedNetworkOwnerType, []ExternalIDKey{ + // cluster-wide address set name + ObjectNameKey, + IPFamilyKey, +}) + +var ACLAdvertisedNetwork = newObjectIDsType(acl, AdvertisedNetworkOwnerType, []ExternalIDKey{ + // ACL name + ObjectNameKey, + // NetworkID + NetworkKey, +}) + var ACLAdminNetworkPolicy = newObjectIDsType(acl, AdminNetworkPolicyOwnerType, []ExternalIDKey{ // anp name ObjectNameKey, @@ -344,3 +368,9 @@ var QoSRuleEgressIP = newObjectIDsType(qos, EgressIPOwnerType, []ExternalIDKey{ // the IP Family for this policy, ip4 or ip6 or ip(dualstack) IPFamilyKey, }) + +var NetworkQoS = newObjectIDsType(qos, NetworkQoSOwnerType, []ExternalIDKey{ + ObjectNameKey, + // rule index + RuleIndex, +}) diff --git a/go-controller/pkg/libovsdb/ops/qos.go b/go-controller/pkg/libovsdb/ops/qos.go index d78be6b1e2..21d6a2f7f8 100644 --- a/go-controller/pkg/libovsdb/ops/qos.go +++ b/go-controller/pkg/libovsdb/ops/qos.go @@ -10,6 +10,11 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" ) +func getQoSMutableFields(qos *nbdb.QoS) []interface{} { + return []interface{}{&qos.Action, &qos.Bandwidth, &qos.Direction, &qos.ExternalIDs, + &qos.Match, &qos.Priority} +} + type QoSPredicate func(*nbdb.QoS) bool // FindQoSesWithPredicate looks up QoSes from the cache based on a @@ -30,7 +35,7 @@ func CreateOrUpdateQoSesOps(nbClient libovsdbclient.Client, ops []ovsdb.Operatio qos := qoses[i] opModel := operationModel{ Model: qos, - OnModelUpdates: []interface{}{}, // update all fields + OnModelUpdates: getQoSMutableFields(qos), ErrNotFound: false, BulkOp: false, } @@ -48,7 +53,7 @@ func UpdateQoSesOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, qoses qos := qoses[i] opModel := operationModel{ Model: qos, - OnModelUpdates: []interface{}{}, // update all fields + OnModelUpdates: getQoSMutableFields(qos), ErrNotFound: true, BulkOp: false, } @@ -111,10 +116,35 @@ func RemoveQoSesFromLogicalSwitchOps(nbClient libovsdbclient.Client, ops []ovsdb opModels := operationModel{ Model: sw, OnModelMutations: []interface{}{&sw.QOSRules}, - ErrNotFound: true, + ErrNotFound: false, BulkOp: false, } modelClient := newModelClient(nbClient) return modelClient.DeleteOps(ops, opModels) } + +// DeleteQoSesWithPredicateOps returns the ops to delete QoSes based on a given predicate +func DeleteQoSesWithPredicateOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, p QoSPredicate) ([]ovsdb.Operation, error) { + deleted := []*nbdb.QoS{} + opModel := operationModel{ + ModelPredicate: p, + ExistingResult: &deleted, + ErrNotFound: false, + BulkOp: true, + } + + m := newModelClient(nbClient) + return m.DeleteOps(ops, opModel) +} + +// DeleteQoSesWithPredicate looks up QoSes from the cache based on +// a given predicate and deletes them +func DeleteQoSesWithPredicate(nbClient libovsdbclient.Client, p QoSPredicate) error { + ops, err := DeleteQoSesWithPredicateOps(nbClient, nil, p) + if err != nil { + return nil + } + _, err = TransactAndCheck(nbClient, ops) + return err +} diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 92af8c2717..be769bc87a 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -1332,7 +1332,7 @@ func routeDifference(routesA, routesB []netlink.Route) []netlink.Route { for _, routeA := range routesA { found = false for _, routeB := range routesB { - if routemanager.RoutePartiallyEqual(routeA, routeB) { + if util.RouteEqual(&routeA, &routeB) { found = true break } diff --git a/go-controller/pkg/node/controllers/egressip/egressip_test.go b/go-controller/pkg/node/controllers/egressip/egressip_test.go index 92f2fff8dd..b8a0a6d6a1 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip_test.go +++ b/go-controller/pkg/node/controllers/egressip/egressip_test.go @@ -1699,10 +1699,21 @@ func getNetlinkAddr(ip, netmask string) *netlink.Addr { // containsRoutes returns true if routes in routes1 are presents in routes routes2 func containsRoutes(routes1 []netlink.Route, routes2 []netlink.Route) bool { var found bool + eq := func(route1, route2 netlink.Route) bool { + // normalize fields that we don't set explicitly and just get set once + // the route is installed + if route1.Family == netlink.FAMILY_ALL { + route1.Family = route2.Family + } + if route1.Protocol == unix.RTPROT_UNSPEC { + route1.Protocol = route2.Protocol + } + return util.RouteEqual(&route1, &route2) + } for _, route1 := range routes1 { found = false for _, route2 := range routes2 { - if routemanager.RoutePartiallyEqual(route1, route2) { + if eq(route1, route2) { found = true break } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 8afed02171..02a110b5d7 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -20,11 +20,13 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + "sigs.k8s.io/knftables" "github.com/ovn-org/libovsdb/client" @@ -40,6 +42,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" @@ -117,6 +120,9 @@ type DefaultNodeNetworkController struct { // retry framework for endpoint slices, used for the removal of stale conntrack entries for services retryEndpointSlices *retry.RetryFramework + // retry framework for nodes, used for updating routes/nftables rules for node PMTUD guarding + retryNodes *retry.RetryFramework + apbExternalRouteNodeController *apbroute.ExternalGatewayNodeController networkManager networkmanager.Interface @@ -181,12 +187,23 @@ func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, net nc.initRetryFrameworkForNode() + err = setupPMTUDNFTSets() + if err != nil { + return nil, fmt.Errorf("failed to setup PMTUD nftables sets: %w", err) + } + + err = setupPMTUDNFTChain() + if err != nil { + return nil, fmt.Errorf("failed to setup PMTUD nftables chain: %w", err) + } + return nc, nil } func (nc *DefaultNodeNetworkController) initRetryFrameworkForNode() { nc.retryNamespaces = nc.newRetryFrameworkNode(factory.NamespaceExGwType) nc.retryEndpointSlices = nc.newRetryFrameworkNode(factory.EndpointSliceForStaleConntrackRemovalType) + nc.retryNodes = nc.newRetryFrameworkNode(factory.NodeType) } func (oc *DefaultNodeNetworkController) shouldReconcileNetworkChange(old, new util.NetInfo) bool { @@ -925,6 +942,13 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { if err := util.SetNodeZone(nodeAnnotator, sbZone); err != nil { return fmt.Errorf("failed to set node zone annotation for node %s: %w", nc.name, err) } + + encapIPList := sets.New[string]() + encapIPList.Insert(strings.Split(config.Default.EffectiveEncapIP, ",")...) + if err := util.SetNodeEncapIPs(nodeAnnotator, encapIPList); err != nil { + return fmt.Errorf("failed to set node-encap-ips annotation for node %s: %w", nc.name, err) + } + if err := nodeAnnotator.Run(); err != nil { return fmt.Errorf("failed to set node %s annotations: %w", nc.name, err) } @@ -1238,6 +1262,10 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if err != nil { return fmt.Errorf("failed to watch endpointSlices: %w", err) } + err = nc.WatchNodes() + if err != nil { + return fmt.Errorf("failed to watch nodes: %w", err) + } } if nc.healthzServer != nil { @@ -1445,6 +1473,144 @@ func (nc *DefaultNodeNetworkController) WatchNamespaces() error { return err } +func (nc *DefaultNodeNetworkController) WatchNodes() error { + _, err := nc.retryNodes.WatchResource() + return err +} + +// addOrUpdateNode handles creating flows or nftables rules for each node to handle PMTUD +func (nc *DefaultNodeNetworkController) addOrUpdateNode(node *corev1.Node) error { + var nftElems []*knftables.Element + var addrs []string + for _, address := range node.Status.Addresses { + if address.Type != corev1.NodeInternalIP { + continue + } + nodeIP := net.ParseIP(address.Address) + if nodeIP == nil { + continue + } + + addrs = append(addrs, nodeIP.String()) + klog.Infof("Adding remote node %q, IP: %s to PMTUD blocking rules", node.Name, nodeIP) + if utilnet.IsIPv4(nodeIP) { + nftElems = append(nftElems, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv4, + Key: []string{nodeIP.String()}, + }) + } else { + nftElems = append(nftElems, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv6, + Key: []string{nodeIP.String()}, + }) + } + } + + gw := nc.Gateway.(*gateway) + gw.openflowManager.updateBridgePMTUDFlowCache(getPMTUDKey(node.Name), addrs) + + if len(nftElems) > 0 { + if err := nodenft.UpdateNFTElements(nftElems); err != nil { + return fmt.Errorf("unable to update NFT elements for node %q, error: %w", node.Name, err) + } + } + + return nil +} + +func removePMTUDNodeNFTRules(nodeIPs []net.IP) error { + var nftElems []*knftables.Element + for _, nodeIP := range nodeIPs { + // Remove IPs from NFT sets + if utilnet.IsIPv4(nodeIP) { + nftElems = append(nftElems, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv4, + Key: []string{nodeIP.String()}, + }) + } else { + nftElems = append(nftElems, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv6, + Key: []string{nodeIP.String()}, + }) + } + } + if len(nftElems) > 0 { + if err := nodenft.DeleteNFTElements(nftElems); err != nil { + return err + } + } + return nil +} + +func (nc *DefaultNodeNetworkController) deleteNode(node *corev1.Node) { + gw := nc.Gateway.(*gateway) + gw.openflowManager.deleteFlowsByKey(getPMTUDKey(node.Name)) + ipsToRemove := make([]net.IP, 0) + for _, address := range node.Status.Addresses { + if address.Type != corev1.NodeInternalIP { + continue + } + nodeIP := net.ParseIP(address.Address) + if nodeIP == nil { + continue + } + ipsToRemove = append(ipsToRemove, nodeIP) + } + + klog.Infof("Deleting NFT elements for node: %s", node.Name) + if err := removePMTUDNodeNFTRules(ipsToRemove); err != nil { + klog.Errorf("Failed to delete nftables rules for PMTUD blocking for node %q: %v", node.Name, err) + } +} + +func (nc *DefaultNodeNetworkController) syncNodes(objs []interface{}) error { + var keepNFTSetElemsV4, keepNFTSetElemsV6 []*knftables.Element + var errors []error + klog.Infof("Starting node controller node sync") + start := time.Now() + for _, obj := range objs { + node, ok := obj.(*corev1.Node) + if !ok { + klog.Errorf("Spurious object in syncNodes: %v", obj) + continue + } + if node.Name == nc.name { + continue + } + for _, address := range node.Status.Addresses { + if address.Type != corev1.NodeInternalIP { + continue + } + nodeIP := net.ParseIP(address.Address) + if nodeIP == nil { + continue + } + + // Remove IPs from NFT sets + if utilnet.IsIPv4(nodeIP) { + keepNFTSetElemsV4 = append(keepNFTSetElemsV4, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv4, + Key: []string{nodeIP.String()}, + }) + } else { + keepNFTSetElemsV6 = append(keepNFTSetElemsV6, &knftables.Element{ + Set: types.NFTNoPMTUDRemoteNodeIPsv6, + Key: []string{nodeIP.String()}, + }) + } + } + } + if err := recreateNFTSet(types.NFTNoPMTUDRemoteNodeIPsv4, keepNFTSetElemsV4); err != nil { + errors = append(errors, err) + } + if err := recreateNFTSet(types.NFTNoPMTUDRemoteNodeIPsv6, keepNFTSetElemsV6); err != nil { + errors = append(errors, err) + } + + klog.Infof("Node controller node sync done. Time taken: %s", time.Since(start)) + return utilerrors.Join(errors...) +} + // validateVTEPInterfaceMTU checks if the MTU of the interface that has ovn-encap-ip is big // enough to carry the `config.Default.MTU` and the Geneve header. If the MTU is not big // enough, it will return an error @@ -1485,6 +1651,10 @@ func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { return nil } +func getPMTUDKey(nodeName string) string { + return fmt.Sprintf("%s_pmtud", nodeName) +} + func configureSvcRouteViaBridge(routeManager *routemanager.Controller, bridge string) error { return configureSvcRouteViaInterface(routeManager, bridge, DummyNextHopIPs()) } diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index afa8807aef..368b333800 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -4,15 +4,25 @@ import ( "context" "fmt" "net" + "sync" + "time" + "github.com/containernetworking/plugins/pkg/ns" + "github.com/containernetworking/plugins/pkg/testutils" + nadfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" "github.com/urfave/cli/v2" "github.com/vishvananda/netlink" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" netlink_mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -23,6 +33,22 @@ import ( . "github.com/onsi/gomega" ) +const v4PMTUDNFTRules = ` +add table inet ovn-kubernetes +add rule inet ovn-kubernetes no-pmtud ip daddr @no-pmtud-remote-node-ips-v4 meta l4proto icmp icmp type 3 icmp code 4 counter drop +add chain inet ovn-kubernetes no-pmtud { type filter hook output priority 0 ; comment "Block egress needs frag/packet too big to remote k8s nodes" ; } +add set inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } +` + +const v6PMTUDNFTRules = ` +add table inet ovn-kubernetes +add rule inet ovn-kubernetes no-pmtud meta l4proto icmpv6 icmpv6 type 2 icmpv6 code 0 ip6 daddr @no-pmtud-remote-node-ips-v6 counter drop +add chain inet ovn-kubernetes no-pmtud { type filter hook output priority 0 ; comment "Block egress needs frag/packet too big to remote k8s nodes" ; } +add set inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { type ipv4_addr ; comment "Block egress ICMP needs frag to remote Kubernetes nodes" ; } +add set inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { type ipv6_addr ; comment "Block egress ICMPv6 packet too big to remote Kubernetes nodes" ; } +` + var _ = Describe("Node", func() { Describe("validateMTU", func() { @@ -652,4 +678,564 @@ var _ = Describe("Node", func() { Expect(err).NotTo(HaveOccurred()) }) }) + Describe("node pmtud management", func() { + var ( + testNS ns.NetNS + nc *DefaultNodeNetworkController + app *cli.App + ) + + const ( + nodeName = "my-node" + remoteNodeName = "other-node" + ) + + BeforeEach(func() { + var err error + testNS, err = testutils.NewNS() + Expect(err).NotTo(HaveOccurred()) + Expect(config.PrepareTestConfig()).To(Succeed()) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + }) + + AfterEach(func() { + util.ResetNetLinkOpMockInst() // other tests in this package rely directly on netlink (e.g. gateway_init_linux_test.go) + Expect(testNS.Close()).To(Succeed()) + }) + + Context("with a cluster in IPv4 mode", func() { + const ( + ethName string = "lo1337" + nodeIP string = "169.254.254.60" + ethCIDR string = nodeIP + "/24" + otherNodeIP string = "169.254.254.61" + otherSubnetNodeIP string = "169.254.253.61" + fullMask = 32 + ) + var link netlink.Link + + BeforeEach(func() { + config.IPv4Mode = true + config.IPv6Mode = false + config.Gateway.Mode = config.GatewayModeShared + + // Note we must do this in default netNS because + // nc.WatchNodes() will spawn goroutines which we cannot lock to the testNS + ovntest.AddLink(ethName) + + var err error + link, err = netlink.LinkByName(ethName) + Expect(err).NotTo(HaveOccurred()) + err = netlink.LinkSetUp(link) + Expect(err).NotTo(HaveOccurred()) + + // Add an IP address + addr, err := netlink.ParseAddr(ethCIDR) + Expect(err).NotTo(HaveOccurred()) + addr.Scope = int(netlink.SCOPE_UNIVERSE) + err = netlink.AddrAdd(link, addr) + Expect(err).NotTo(HaveOccurred()) + + }) + + AfterEach(func() { + err := netlink.LinkDel(link) + Expect(err).NotTo(HaveOccurred()) + }) + + ovntest.OnSupportedPlatformsIt("adds and removes nftables rule for node in same subnet", func() { + + app.Action = func(_ *cli.Context) error { + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: nodeIP, + }, + }, + }, + } + + otherNode := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: remoteNodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: otherNodeIP, + }, + }, + }, + } + nft := nodenft.SetFakeNFTablesHelper() + + kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{ + Items: []corev1.Node{node, otherNode}, + }) + fakeClient := &util.OVNNodeClientset{ + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), + NetworkAttchDefClient: nadfake.NewSimpleClientset(), + } + + stop := make(chan struct{}) + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + wg := &sync.WaitGroup{} + defer func() { + close(stop) + wg.Wait() + wf.Shutdown() + }() + + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + routeManager := routemanager.NewController() + cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) + nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) + nc.initRetryFrameworkForNode() + err = setupPMTUDNFTSets() + Expect(err).NotTo(HaveOccurred()) + err = setupPMTUDNFTChain() + Expect(err).NotTo(HaveOccurred()) + defaultNetConfig := &bridgeUDNConfiguration{ + ofPortPatch: "patch-breth0_ov", + } + nc.Gateway = &gateway{ + openflowManager: &openflowManager{ + flowCache: map[string][]string{}, + defaultBridge: &bridgeConfiguration{ + netConfig: map[string]*bridgeUDNConfiguration{ + types.DefaultNetworkName: defaultNetConfig, + }, + }, + }, + } + + // must run route manager manually which is usually started with nc.Start() + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + nc.routeManager.Run(stop, 10*time.Second) + Expect(err).NotTo(HaveOccurred()) + }() + By("start up should add nftables rules for remote node") + + err = nc.WatchNodes() + Expect(err).NotTo(HaveOccurred()) + nftRules := v4PMTUDNFTRules + ` +add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.254.61 } +` + err = nodenft.MatchNFTRules(nftRules, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + gw := nc.Gateway.(*gateway) + By("start up should add openflow rules for remote node") + flows := gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) + Expect(flows).To(HaveLen(1)) + + By("deleting the remote node should remove the nftables element") + err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), remoteNodeName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return nodenft.MatchNFTRules(v4PMTUDNFTRules, nft.Dump()) + }).WithTimeout(2 * time.Second).ShouldNot(HaveOccurred()) + Eventually(func() []string { return gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) }).WithTimeout(2 * time.Second).Should(BeEmpty()) + return nil + + } + + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + + ovntest.OnSupportedPlatformsIt("adds and removes nftables rule for node in different subnet", func() { + + app.Action = func(_ *cli.Context) error { + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: nodeIP, + }, + }, + }, + } + + otherNode := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: remoteNodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: otherSubnetNodeIP, + }, + }, + }, + } + nft := nodenft.SetFakeNFTablesHelper() + + kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{ + Items: []corev1.Node{node, otherNode}, + }) + fakeClient := &util.OVNNodeClientset{ + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), + NetworkAttchDefClient: nadfake.NewSimpleClientset(), + } + + stop := make(chan struct{}) + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + wg := &sync.WaitGroup{} + defer func() { + close(stop) + wg.Wait() + wf.Shutdown() + }() + + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + routeManager := routemanager.NewController() + cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) + nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) + nc.initRetryFrameworkForNode() + err = setupPMTUDNFTSets() + Expect(err).NotTo(HaveOccurred()) + err = setupPMTUDNFTChain() + Expect(err).NotTo(HaveOccurred()) + defaultNetConfig := &bridgeUDNConfiguration{ + ofPortPatch: "patch-breth0_ov", + } + nc.Gateway = &gateway{ + openflowManager: &openflowManager{ + flowCache: map[string][]string{}, + defaultBridge: &bridgeConfiguration{ + netConfig: map[string]*bridgeUDNConfiguration{ + types.DefaultNetworkName: defaultNetConfig, + }, + }, + }, + } + + // must run route manager manually which is usually started with nc.Start() + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + nc.routeManager.Run(stop, 10*time.Second) + Expect(err).NotTo(HaveOccurred()) + }() + By("start up should add nftables rules for remote node") + + err = nc.WatchNodes() + Expect(err).NotTo(HaveOccurred()) + nftRules := v4PMTUDNFTRules + ` +add element inet ovn-kubernetes no-pmtud-remote-node-ips-v4 { 169.254.253.61 } +` + err = nodenft.MatchNFTRules(nftRules, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + gw := nc.Gateway.(*gateway) + By("start up should add openflow rules for remote node") + flows := gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) + Expect(flows).To(HaveLen(1)) + + By("deleting the remote node should remove the nftables element") + err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), remoteNodeName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return nodenft.MatchNFTRules(v4PMTUDNFTRules, nft.Dump()) + }).WithTimeout(2 * time.Second).ShouldNot(HaveOccurred()) + Eventually(func() []string { return gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) }).WithTimeout(2 * time.Second).Should(BeEmpty()) + return nil + + } + + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Context("with a cluster in IPv6 mode", func() { + const ( + ethName string = "lo1337" + nodeIP string = "2001:db8:1::3" + ethCIDR string = nodeIP + "/64" + otherNodeIP string = "2001:db8:1::4" + otherSubnetNodeIP string = "2002:db8:1::4" + fullMask = 128 + ) + + var link netlink.Link + + BeforeEach(func() { + config.IPv4Mode = false + config.IPv6Mode = true + config.Gateway.Mode = config.GatewayModeShared + + // Note we must do this in default netNS because + // nc.WatchNodes() will spawn goroutines which we cannot lock to the testNS + ovntest.AddLink(ethName) + + var err error + link, err = netlink.LinkByName(ethName) + Expect(err).NotTo(HaveOccurred()) + err = netlink.LinkSetUp(link) + Expect(err).NotTo(HaveOccurred()) + + // Add an IP address + addr, err := netlink.ParseAddr(ethCIDR) + Expect(err).NotTo(HaveOccurred()) + addr.Scope = int(netlink.SCOPE_UNIVERSE) + err = netlink.AddrAdd(link, addr) + Expect(err).NotTo(HaveOccurred()) + + }) + + AfterEach(func() { + err := netlink.LinkDel(link) + Expect(err).NotTo(HaveOccurred()) + }) + + ovntest.OnSupportedPlatformsIt("adds and removes nftables rule for node in same subnet", func() { + + app.Action = func(_ *cli.Context) error { + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: nodeIP, + }, + }, + }, + } + + otherNode := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: remoteNodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: otherNodeIP, + }, + }, + }, + } + nft := nodenft.SetFakeNFTablesHelper() + + kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{ + Items: []corev1.Node{node, otherNode}, + }) + fakeClient := &util.OVNNodeClientset{ + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), + NetworkAttchDefClient: nadfake.NewSimpleClientset(), + } + + stop := make(chan struct{}) + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + wg := &sync.WaitGroup{} + defer func() { + close(stop) + wg.Wait() + wf.Shutdown() + }() + + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + routeManager := routemanager.NewController() + cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) + nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) + nc.initRetryFrameworkForNode() + err = setupPMTUDNFTSets() + Expect(err).NotTo(HaveOccurred()) + err = setupPMTUDNFTChain() + Expect(err).NotTo(HaveOccurred()) + defaultNetConfig := &bridgeUDNConfiguration{ + ofPortPatch: "patch-breth0_ov", + } + nc.Gateway = &gateway{ + openflowManager: &openflowManager{ + flowCache: map[string][]string{}, + defaultBridge: &bridgeConfiguration{ + netConfig: map[string]*bridgeUDNConfiguration{ + types.DefaultNetworkName: defaultNetConfig, + }, + }, + }, + } + + // must run route manager manually which is usually started with nc.Start() + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + nc.routeManager.Run(stop, 10*time.Second) + Expect(err).NotTo(HaveOccurred()) + }() + By("start up should add nftables rules for remote node") + + err = nc.WatchNodes() + Expect(err).NotTo(HaveOccurred()) + nftRules := v6PMTUDNFTRules + ` +add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2001:db8:1::4 } +` + err = nodenft.MatchNFTRules(nftRules, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + gw := nc.Gateway.(*gateway) + By("start up should add openflow rules for remote node") + flows := gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) + Expect(flows).To(HaveLen(1)) + + By("deleting the remote node should remove the nftables element") + err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), remoteNodeName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return nodenft.MatchNFTRules(v6PMTUDNFTRules, nft.Dump()) + }).WithTimeout(2 * time.Second).ShouldNot(HaveOccurred()) + Eventually(func() []string { return gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) }).WithTimeout(2 * time.Second).Should(BeEmpty()) + return nil + } + + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + + ovntest.OnSupportedPlatformsIt("adds and removes nftables rule for node in different subnet", func() { + + app.Action = func(_ *cli.Context) error { + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: nodeIP, + }, + }, + }, + } + + otherNode := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: remoteNodeName, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: otherSubnetNodeIP, + }, + }, + }, + } + nft := nodenft.SetFakeNFTablesHelper() + + kubeFakeClient := fake.NewSimpleClientset(&corev1.NodeList{ + Items: []corev1.Node{node, otherNode}, + }) + fakeClient := &util.OVNNodeClientset{ + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), + NetworkAttchDefClient: nadfake.NewSimpleClientset(), + } + + stop := make(chan struct{}) + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + wg := &sync.WaitGroup{} + defer func() { + close(stop) + wg.Wait() + wf.Shutdown() + }() + + err = wf.Start() + Expect(err).NotTo(HaveOccurred()) + routeManager := routemanager.NewController() + cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName, routeManager) + nc = newDefaultNodeNetworkController(cnnci, stop, wg, routeManager, nil) + nc.initRetryFrameworkForNode() + err = setupPMTUDNFTSets() + Expect(err).NotTo(HaveOccurred()) + err = setupPMTUDNFTChain() + Expect(err).NotTo(HaveOccurred()) + defaultNetConfig := &bridgeUDNConfiguration{ + ofPortPatch: "patch-breth0_ov", + } + nc.Gateway = &gateway{ + openflowManager: &openflowManager{ + flowCache: map[string][]string{}, + defaultBridge: &bridgeConfiguration{ + netConfig: map[string]*bridgeUDNConfiguration{ + types.DefaultNetworkName: defaultNetConfig, + }, + }, + }, + } + + // must run route manager manually which is usually started with nc.Start() + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + nc.routeManager.Run(stop, 10*time.Second) + Expect(err).NotTo(HaveOccurred()) + }() + By("start up should add nftables rules for remote node") + + err = nc.WatchNodes() + Expect(err).NotTo(HaveOccurred()) + nftRules := v6PMTUDNFTRules + ` +add element inet ovn-kubernetes no-pmtud-remote-node-ips-v6 { 2002:db8:1::4 } +` + err = nodenft.MatchNFTRules(nftRules, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + gw := nc.Gateway.(*gateway) + By("start up should add openflow rules for remote node") + flows := gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) + Expect(flows).To(HaveLen(1)) + + By("deleting the remote node should remove the nftables element") + err = kubeFakeClient.CoreV1().Nodes().Delete(context.TODO(), remoteNodeName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return nodenft.MatchNFTRules(v6PMTUDNFTRules, nft.Dump()) + }).WithTimeout(2 * time.Second).ShouldNot(HaveOccurred()) + Eventually(func() []string { return gw.openflowManager.getFlowsByKey(getPMTUDKey(remoteNodeName)) }).WithTimeout(2 * time.Second).Should(BeEmpty()) + return nil + } + + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + + }) + + }) + }) diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index ac759ea3c7..1b4544f89b 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -625,19 +625,24 @@ func bridgeForInterface(intfName, nodeName, if err != nil { return nil, fmt.Errorf("gateway accelerated interface %s is not valid: %w", config.Gateway.GatewayAcceleratedInterface, err) } + gwIntf = config.Gateway.GatewayAcceleratedInterface isGWAcclInterface = true klog.Infof("For gateway accelerated interface %s representor: %s", config.Gateway.GatewayAcceleratedInterface, intfRep) + } else { + intfRep, err = getRepresentor(gwIntf) + if err == nil { + isGWAcclInterface = true + } } if isGWAcclInterface { - gatewayAcceleratedInterface := config.Gateway.GatewayAcceleratedInterface bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfRep) if err != nil { return nil, fmt.Errorf("failed to find bridge that has port %s: %w", intfRep, err) } - link, err := util.GetNetLinkOps().LinkByName(gatewayAcceleratedInterface) + link, err := util.GetNetLinkOps().LinkByName(gwIntf) if err != nil { - return nil, fmt.Errorf("failed to get netdevice link for %s: %w", gatewayAcceleratedInterface, err) + return nil, fmt.Errorf("failed to get netdevice link for %s: %w", gwIntf, err) } uplinkName, err := util.GetNicName(bridgeName) if err != nil { @@ -646,7 +651,7 @@ func bridgeForInterface(intfName, nodeName, res.bridgeName = bridgeName res.uplinkName = uplinkName res.gwIfaceRep = intfRep - res.gwIface = gatewayAcceleratedInterface + res.gwIface = gwIntf res.macAddress = link.Attrs().HardwareAddr } else if bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfName); err == nil { // This is an OVS bridge's internal port diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index a4778f417f..ddde471afa 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -1772,8 +1772,7 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkByName", lnkAttr.Name).Return(lnk, nil) netlinkMock.On("LinkByIndex", lnkAttr.Index).Return(lnk, nil) netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) - netlinkMock.On("RouteAdd", mock.Anything, mock.Anything, mock.Anything).Return(nil) + netlinkMock.On("RouteReplace", mock.Anything, mock.Anything, mock.Anything).Return(nil) wg := &sync.WaitGroup{} rm := routemanager.NewController() util.SetNetLinkOpMockInst(netlinkMock) @@ -1802,15 +1801,6 @@ var _ = Describe("Gateway unit tests", func() { Name: "ens1f0", Index: 5, } - previousRoute := &netlink.Route{ - Dst: ipnet, - LinkIndex: 5, - Scope: netlink.SCOPE_UNIVERSE, - Gw: gwIPs[0], - MTU: config.Default.MTU - 100, - Src: srcIP, - } - expectedRoute := &netlink.Route{ Dst: ipnet, LinkIndex: 5, @@ -1818,13 +1808,13 @@ var _ = Describe("Gateway unit tests", func() { Gw: gwIPs[0], MTU: config.Default.MTU, Src: srcIP, + Table: syscall.RT_TABLE_MAIN, } lnk.On("Attrs").Return(lnkAttr) netlinkMock.On("LinkByName", lnkAttr.Name).Return(lnk, nil) netlinkMock.On("LinkByIndex", lnkAttr.Index).Return(lnk, nil) netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) - netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*previousRoute}, nil) netlinkMock.On("RouteReplace", expectedRoute).Return(nil) wg := &sync.WaitGroup{} rm := routemanager.NewController() diff --git a/go-controller/pkg/node/gateway_nftables.go b/go-controller/pkg/node/gateway_nftables.go index 78f4351fff..6e341466ab 100644 --- a/go-controller/pkg/node/gateway_nftables.go +++ b/go-controller/pkg/node/gateway_nftables.go @@ -122,7 +122,12 @@ func recreateNFTSet(setName string, keepNFTElems []*knftables.Element) error { tx.Add(elem) } } - return nft.Run(context.TODO(), tx) + err = nft.Run(context.TODO(), tx) + // no error if set is not created and we desire zero NFT elements + if knftables.IsNotFound(err) && len(keepNFTElems) == 0 { + return nil + } + return err } func recreateNFTMap(mapName string, keepNFTElems []*knftables.Element) error { @@ -139,7 +144,12 @@ func recreateNFTMap(mapName string, keepNFTElems []*knftables.Element) error { tx.Add(elem) } } - return nft.Run(context.TODO(), tx) + err = nft.Run(context.TODO(), tx) + // no error if set is not created and we desire zero NFT elements + if knftables.IsNotFound(err) && len(keepNFTElems) == 0 { + return nil + } + return err } // getGatewayNFTRules returns nftables rules for service. This must be used in conjunction diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 4f8d4bfea7..bcfa83718b 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -44,6 +44,9 @@ const ( // bridge to move packets between host and external for etp=local traffic. // The hex number 0xe745ecf105, represents etp(e74)-service(5ec)-flows which makes it easier for debugging. etpSvcOpenFlowCookie = "0xe745ecf105" + // pmtudOpenFlowCookie identifies the flows used to drop ICMP type (3) destination unreachable, + // fragmentation-needed (4) + pmtudOpenFlowCookie = "0x0304" // ovsLocalPort is the name of the OVS bridge local port ovsLocalPort = "LOCAL" // ctMarkOVN is the conntrack mark value for OVN traffic @@ -89,6 +92,10 @@ const ( // to the appropriate network. nftablesUDNMarkExternalIPsV4Map = "udn-mark-external-ips-v4" nftablesUDNMarkExternalIPsV6Map = "udn-mark-external-ips-v6" + + // outputPortDrop is used to signify that there is no output port for an openflow action and the + // rendered action should result in a drop + outputPortDrop = "output-port-drop" ) // configureUDNServicesNFTables configures the nftables chains, rules, and verdict maps @@ -415,7 +422,7 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *corev1.Service, netI ipPrefix = "ipv6" } // table 2, user-defined network host -> OVN towards default cluster network services - defaultNetConfig := npw.ofm.defaultBridge.getActiveNetworkBridgeConfig(types.DefaultNetworkName) + defaultNetConfig := npw.ofm.defaultBridge.getActiveNetworkBridgeConfigCopy(types.DefaultNetworkName) // sample flow: cookie=0xdeff105, duration=2319.685s, table=2, n_packets=496, n_bytes=67111, priority=300, // ip,nw_dst=10.96.0.1 actions=mod_dl_dst:02:42:ac:12:00:03,output:"patch-breth0_ov" // This flow is used for UDNs and advertised UDNs to be able to reach kapi and dns services alone on default network @@ -530,7 +537,7 @@ func (npw *nodePortWatcher) createLbAndExternalSvcFlows(service *corev1.Service, etpSvcOpenFlowCookie, npw.ofportPhys)) } else if config.Gateway.Mode == config.GatewayModeShared { // add the ICMP Fragmentation flow for shared gateway mode. - icmpFlow := npw.generateICMPFragmentationFlow(nwDst, externalIPOrLBIngressIP, netConfig.ofPortPatch, cookie) + icmpFlow := generateICMPFragmentationFlow(externalIPOrLBIngressIP, netConfig.ofPortPatch, npw.ofportPhys, cookie, 110) externalIPFlows = append(externalIPFlows, icmpFlow) // case2 (see function description for details) externalIPFlows = append(externalIPFlows, @@ -596,20 +603,28 @@ func (npw *nodePortWatcher) generateARPBypassFlow(ofPorts []string, ofPortPatch, return arpFlow } -func (npw *nodePortWatcher) generateICMPFragmentationFlow(nwDst, ipAddr string, ofPortPatch, cookie string) string { +func generateICMPFragmentationFlow(ipAddr, outputPort, inPort, cookie string, priority int) string { // we send any ICMP destination unreachable, fragmentation needed to the OVN pipeline too so that // path MTU discovery continues to work. icmpMatch := "icmp" icmpType := 3 icmpCode := 4 + nwDst := "nw_dst" if utilnet.IsIPv6String(ipAddr) { icmpMatch = "icmp6" icmpType = 2 icmpCode = 0 + nwDst = "ipv6_dst" + } + + action := fmt.Sprintf("output:%s", outputPort) + if outputPort == outputPortDrop { + action = "drop" } - icmpFragmentationFlow := fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, icmp_type=%d, "+ - "icmp_code=%d, actions=output:%s", - cookie, npw.ofportPhys, icmpMatch, nwDst, ipAddr, icmpType, icmpCode, ofPortPatch) + + icmpFragmentationFlow := fmt.Sprintf("cookie=%s, priority=%d, in_port=%s, %s, %s=%s, icmp_type=%d, "+ + "icmp_code=%d, actions=%s", + cookie, priority, inPort, icmpMatch, nwDst, ipAddr, icmpType, icmpCode, action) return icmpFragmentationFlow } @@ -1023,6 +1038,10 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { } netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) + // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted. + if util.IsInvalidPrimaryNetworkError(err) { + continue + } if err != nil { errors = append(errors, err) continue @@ -2215,6 +2234,21 @@ func commonFlows(hostSubnets []*net.IPNet, bridge *bridgeConfiguration) ([]strin return dftFlows, nil } +func pmtudDropFlows(bridge *bridgeConfiguration, ipAddrs []string) []string { + var flows []string + if config.Gateway.Mode != config.GatewayModeShared { + return nil + } + for _, addr := range ipAddrs { + for _, netConfig := range bridge.patchedNetConfigs() { + flows = append(flows, + generateICMPFragmentationFlow(addr, outputPortDrop, netConfig.ofPortPatch, pmtudOpenFlowCookie, 700)) + } + } + + return flows +} + // ovnToHostNetworkNormalActionFlows returns the flows that allow IP{v4,v6} traffic from the OVN network to the host network // when the destination is on the same node as the sender. This is necessary for pods in the default network to reach // localnet pods on the same node, when the localnet is mapped to breth0. The expected srcMAC is the MAC address of breth0 diff --git a/go-controller/pkg/node/gateway_udn.go b/go-controller/pkg/node/gateway_udn.go index 7dad83af98..7b755806fd 100644 --- a/go-controller/pkg/node/gateway_udn.go +++ b/go-controller/pkg/node/gateway_udn.go @@ -146,12 +146,18 @@ func (b *bridgeConfiguration) delNetworkBridgeConfig(nInfo util.NetInfo) { delete(b.netConfig, nInfo.GetNetworkName()) } -// getActiveNetworkBridgeConfig returns a shallow copy of the network configuration corresponding to the +func (b *bridgeConfiguration) getNetworkBridgeConfig(networkName string) *bridgeUDNConfiguration { + b.Lock() + defer b.Unlock() + return b.netConfig[networkName] +} + +// getActiveNetworkBridgeConfigCopy returns a shallow copy of the network configuration corresponding to the // provided netInfo. // // NOTE: if the network configuration can't be found or if the network is not patched by OVN // yet this returns nil. -func (b *bridgeConfiguration) getActiveNetworkBridgeConfig(networkName string) *bridgeUDNConfiguration { +func (b *bridgeConfiguration) getActiveNetworkBridgeConfigCopy(networkName string) *bridgeUDNConfiguration { b.Lock() defer b.Unlock() @@ -917,9 +923,18 @@ func (udng *UserDefinedNetworkGateway) Reconcile() { func (udng *UserDefinedNetworkGateway) doReconcile() error { klog.Infof("Reconciling gateway with updates for UDN %s", udng.GetNetworkName()) + // shouldn't happen + if udng.openflowManager == nil || udng.openflowManager.defaultBridge == nil { + return fmt.Errorf("openflow manager with default bridge configuration has not been provided for network %s", udng.GetNetworkName()) + } + // update bridge configuration isNetworkAdvertised := util.IsPodNetworkAdvertisedAtNode(udng.NetInfo, udng.node.Name) - udng.openflowManager.defaultBridge.netConfig[udng.GetNetworkName()].advertised.Store(isNetworkAdvertised) + netConfig := udng.openflowManager.defaultBridge.getNetworkBridgeConfig(udng.GetNetworkName()) + if netConfig == nil { + return fmt.Errorf("missing bridge configuration for network %s", udng.GetNetworkName()) + } + netConfig.advertised.Store(isNetworkAdvertised) if err := udng.updateUDNVRFIPRules(isNetworkAdvertised); err != nil { return fmt.Errorf("error while updating ip rule for UDN %s: %s", udng.GetNetworkName(), err) diff --git a/go-controller/pkg/node/gateway_udn_test.go b/go-controller/pkg/node/gateway_udn_test.go index 7312521ab7..34673210ca 100644 --- a/go-controller/pkg/node/gateway_udn_test.go +++ b/go-controller/pkg/node/gateway_udn_test.go @@ -40,6 +40,7 @@ import ( ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" coreinformermocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1" v1mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1" + fakenetworkmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -1554,6 +1555,34 @@ var _ = Describe("UserDefinedNetworkGateway", func() { Expect(err).NotTo(HaveOccurred()) Expect(fexec.CalledMatchesExpected()).To(BeTrue(), fexec.ErrorDesc) }) + + It("should sync node port watcher successfully if a namespaces network is invalid", func() { + // create new gateway, add ns with primary UDN, pod, expose pod via Node port service, delete pod, delete udn, ensure sync should succeeds + namespace := util.NewNamespace("udn") + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + namespace.Labels[types.RequiredUDNNamespaceLabel] = "" + service := newService("udn-svc", namespace.Name, "10.96.0.10", []corev1.ServicePort{{NodePort: int32(30300), + Protocol: corev1.ProtocolTCP, Port: int32(8080)}}, corev1.ServiceTypeNodePort, []string{}, corev1.ServiceStatus{}, + true, false) + fakeClient := util.GetOVNClientset(service, namespace) + wf, err := factory.NewNodeWatchFactory(fakeClient.GetNodeClientset(), "node") + Expect(err).ToNot(HaveOccurred(), "must get new node watch factory") + Expect(wf.Start()).NotTo(HaveOccurred(), "must start Node watch factory") + defer func() { + wf.Shutdown() + }() + iptV4, iptV6 := util.SetFakeIPTablesHelpers() + nodenft.SetFakeNFTablesHelper() + fNPW := initFakeNodePortWatcher(iptV4, iptV6) + fNPW.watchFactory = wf + // in-order to simulate a namespace with an Invalid UDN (when GetActiveNamespace is called), we add an entry + // to the fake network manager but no specified network. GetActiveNetwork will return the appropriate error of Invalid Network for namespace. + // network manager may have a different implementation that fake network manager but both will return the same error. + fNPW.networkManager = &fakenetworkmanager.FakeNetworkManager{PrimaryNetworks: map[string]util.NetInfo{namespace.Name: nil}} + services := append([]interface{}{}, service) + Expect(fNPW.SyncServices(services)).NotTo(HaveOccurred(), "must sync services") + }) }) func TestConstructUDNVRFIPRules(t *testing.T) { diff --git a/go-controller/pkg/node/linkmanager/link_network_manager.go b/go-controller/pkg/node/linkmanager/link_network_manager.go index 469de89852..ce047965e5 100644 --- a/go-controller/pkg/node/linkmanager/link_network_manager.go +++ b/go-controller/pkg/node/linkmanager/link_network_manager.go @@ -22,10 +22,11 @@ type LinkAddress struct { } type Controller struct { - mu *sync.Mutex - name string - ipv4Enabled bool - ipv6Enabled bool + mu *sync.Mutex + name string + ipv4Enabled bool + ipv6Enabled bool + // map of link name to address store map[string][]netlink.Addr linkHandlerFunc func(link netlink.Link) error } diff --git a/go-controller/pkg/node/managementport/port_linux_test.go b/go-controller/pkg/node/managementport/port_linux_test.go index 30bc33b78b..d6d99d7577 100644 --- a/go-controller/pkg/node/managementport/port_linux_test.go +++ b/go-controller/pkg/node/managementport/port_linux_test.go @@ -29,6 +29,7 @@ import ( egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressipv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + networkqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" @@ -273,7 +274,7 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net Expect(err).NotTo(HaveOccurred()) kubeInterface := &kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, ANPClient: anpfake.NewSimpleClientset(), EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, - EgressServiceClient: &egressservicefake.Clientset{}} + EgressServiceClient: &egressservicefake.Clientset{}, NetworkQoSClient: &networkqosfake.Clientset{}} nodeAnnotator := kube.NewNodeAnnotator(kubeInterface, existingNode.Name) watchFactory, err := factory.NewNodeWatchFactory(fakeNodeClient, nodeName) Expect(err).NotTo(HaveOccurred()) @@ -374,7 +375,7 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) - kubeInterface := &kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, ANPClient: anpfake.NewSimpleClientset(), EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}} + kubeInterface := &kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, ANPClient: anpfake.NewSimpleClientset(), EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}, NetworkQoSClient: &networkqosfake.Clientset{}} nodeAnnotator := kube.NewNodeAnnotator(kubeInterface, existingNode.Name) watchFactory, err := factory.NewNodeWatchFactory(fakeNodeClient, nodeName) Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index 7f07b25c9f..a0c5ab21e8 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -260,7 +260,7 @@ func (c *addressManager) handleNodePrimaryAddrChange() { } if nodePrimaryAddrChanged && config.Default.EncapIP == "" { klog.Infof("Node primary address changed to %v. Updating OVN encap IP.", c.nodePrimaryAddr) - updateOVNEncapIPAndReconnect(c.nodePrimaryAddr) + c.updateOVNEncapIPAndReconnect(c.nodePrimaryAddr) } } @@ -544,7 +544,7 @@ func (c *addressManager) getPrimaryHostEgressIPs() (sets.Set[string], error) { } // updateOVNEncapIPAndReconnect updates encap IP to OVS when the node primary IP changed. -func updateOVNEncapIPAndReconnect(newIP net.IP) { +func (c *addressManager) updateOVNEncapIPAndReconnect(newIP net.IP) { checkCmd := []string{ "get", "Open_vSwitch", @@ -584,6 +584,18 @@ func updateOVNEncapIPAndReconnect(newIP net.IP) { klog.Errorf("Failed to exit ovn-controller %v %q", err, stderr) return } + + // Update node-encap-ips annotation + encapIPList := sets.New[string](config.Default.EffectiveEncapIP) + if err := util.SetNodeEncapIPs(c.nodeAnnotator, encapIPList); err != nil { + klog.Errorf("Failed to set node-encap-ips annotation for node %s: %v", c.nodeName, err) + return + } + + if err := c.nodeAnnotator.Run(); err != nil { + klog.Errorf("Failed to set node %s annotations: %v", c.nodeName, err) + return + } } func getSupportedIPFamily() int { diff --git a/go-controller/pkg/node/node_nftables.go b/go-controller/pkg/node/node_nftables.go new file mode 100644 index 0000000000..e52a8970a4 --- /dev/null +++ b/go-controller/pkg/node/node_nftables.go @@ -0,0 +1,100 @@ +package node + +import ( + "context" + "fmt" + + "sigs.k8s.io/knftables" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +const nftPMTUDChain = "no-pmtud" + +// setupPMTUDNFTSets sets up the NFT sets that contain remote Kubernetes node IPs +func setupPMTUDNFTSets() error { + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper: %w", err) + } + + tx := nft.NewTransaction() + tx.Add(&knftables.Set{ + Name: types.NFTNoPMTUDRemoteNodeIPsv4, + Comment: knftables.PtrTo("Block egress ICMP needs frag to remote Kubernetes nodes"), + Type: "ipv4_addr", + }) + tx.Add(&knftables.Set{ + Name: types.NFTNoPMTUDRemoteNodeIPsv6, + Comment: knftables.PtrTo("Block egress ICMPv6 packet too big to remote Kubernetes nodes"), + Type: "ipv6_addr", + }) + + err = nft.Run(context.TODO(), tx) + if err != nil { + return fmt.Errorf("could not add nftables sets for pmtud blocking: %v", err) + } + return nil +} + +// setupPMTUDNFTChain sets up the chain and rules to block PMTUD packets from being sent to k8s nodes +// Relies on the sets from setupPMTUDNFTSets. +func setupPMTUDNFTChain() error { + counterIfDebug := "" + if config.Logging.Level > 4 { + counterIfDebug = "counter" + } + + nft, err := nodenft.GetNFTablesHelper() + if err != nil { + return fmt.Errorf("failed to get nftables helper") + } + + tx := nft.NewTransaction() + tx.Add(&knftables.Chain{ + Name: nftPMTUDChain, + Comment: knftables.PtrTo("Block egress needs frag/packet too big to remote k8s nodes"), + Type: knftables.PtrTo(knftables.FilterType), + Hook: knftables.PtrTo(knftables.OutputHook), + Priority: knftables.PtrTo(knftables.FilterPriority), + }) + + tx.Flush(&knftables.Chain{ + Name: nftPMTUDChain, + }) + if config.IPv4Mode { + tx.Add(&knftables.Rule{ + Chain: nftPMTUDChain, + Rule: knftables.Concat( + "ip daddr @"+types.NFTNoPMTUDRemoteNodeIPsv4, + "meta l4proto icmp", + "icmp type 3", // type 3 == Destination Unreachable + "icmp code 4", // code 4 indicates fragmentation needed + counterIfDebug, + "drop", + ), + }) + } + + if config.IPv6Mode { + tx.Add(&knftables.Rule{ + Chain: nftPMTUDChain, // your egress chain for IPv6 traffic + Rule: knftables.Concat( + "meta l4proto icmpv6", // match on ICMPv6 packets + "icmpv6 type 2", // type 2 == Packet Too Big (PMTUD) + "icmpv6 code 0", // code 0 for that message + "ip6 daddr @"+types.NFTNoPMTUDRemoteNodeIPsv6, + counterIfDebug, + "drop", // drop the packet + ), + }) + } + + err = nft.Run(context.TODO(), tx) + if err != nil { + return fmt.Errorf("could not update nftables rule for PMTUD: %v", err) + } + return nil +} diff --git a/go-controller/pkg/node/obj_retry_node.go b/go-controller/pkg/node/obj_retry_node.go index a930609ef2..148bb3cc40 100644 --- a/go-controller/pkg/node/obj_retry_node.go +++ b/go-controller/pkg/node/obj_retry_node.go @@ -2,6 +2,7 @@ package node import ( "fmt" + "net" "reflect" corev1 "k8s.io/api/core/v1" @@ -70,7 +71,8 @@ func (nc *DefaultNodeNetworkController) newRetryFrameworkNode(objectType reflect func hasResourceAnUpdateFunc(objType reflect.Type) bool { switch objType { case factory.NamespaceExGwType, - factory.EndpointSliceForStaleConntrackRemovalType: + factory.EndpointSliceForStaleConntrackRemovalType, + factory.NodeType: return true } return false @@ -80,7 +82,8 @@ func hasResourceAnUpdateFunc(objType reflect.Type) bool { func needsUpdateDuringRetry(objType reflect.Type) bool { switch objType { case factory.NamespaceExGwType, - factory.EndpointSliceForStaleConntrackRemovalType: + factory.EndpointSliceForStaleConntrackRemovalType, + factory.NodeType: return true } return false @@ -109,6 +112,17 @@ func (h *nodeEventHandler) AreResourcesEqual(obj1, obj2 interface{}) (bool, erro // always run update code return false, nil + case factory.NodeType: + node1, ok := obj1.(*corev1.Node) + if !ok { + return false, fmt.Errorf("could not cast obj1 of type %T to *kapi.Node", obj1) + } + node2, ok := obj2.(*corev1.Node) + if !ok { + return false, fmt.Errorf("could not cast obj2 of type %T to *kapi.Node", obj2) + } + return reflect.DeepEqual(node1.Status.Addresses, node2.Status.Addresses), nil + default: return false, fmt.Errorf("no object comparison for type %s", h.objType) } @@ -135,6 +149,9 @@ func (h *nodeEventHandler) GetResourceFromInformerCache(key string) (interface{} case factory.EndpointSliceForStaleConntrackRemovalType: obj, err = h.nc.watchFactory.GetEndpointSlice(namespace, name) + case factory.NodeType: + obj, err = h.nc.watchFactory.GetNode(name) + default: err = fmt.Errorf("object type %s not supported, cannot retrieve it from informers cache", h.objType) @@ -147,13 +164,21 @@ func (h *nodeEventHandler) GetResourceFromInformerCache(key string) (interface{} // the function was executed from iterateRetryResources, AddResource adds the // specified object to the cluster according to its type and returns the error, // if any, yielded during object creation. -func (h *nodeEventHandler) AddResource(_ interface{}, _ bool) error { +func (h *nodeEventHandler) AddResource(obj interface{}, _ bool) error { switch h.objType { case factory.NamespaceExGwType, factory.EndpointSliceForStaleConntrackRemovalType: // no action needed upon add event return nil + case factory.NodeType: + node := obj.(*corev1.Node) + // if it's our node that is changing, then nothing to do as we dont add our own IP to the nftables rules + if node.Name == h.nc.name { + return nil + } + return h.nc.addOrUpdateNode(node) + default: return fmt.Errorf("no add function for object type %s", h.objType) } @@ -187,6 +212,46 @@ func (h *nodeEventHandler) UpdateResource(oldObj, newObj interface{}, _ bool) er return h.nc.reconcileConntrackUponEndpointSliceEvents( oldEndpointSlice, newEndpointSlice) + case factory.NodeType: + oldNode := oldObj.(*corev1.Node) + newNode := newObj.(*corev1.Node) + + // if it's our node that is changing, then nothing to do as we dont add our own IP to the nftables rules + if newNode.Name == h.nc.name { + return nil + } + + // remote node that is changing + ipsToKeep := map[string]bool{} + for _, address := range newNode.Status.Addresses { + if address.Type != corev1.NodeInternalIP { + continue + } + nodeIP := net.ParseIP(address.Address) + if nodeIP == nil { + continue + } + ipsToKeep[nodeIP.String()] = true + } + ipsToRemove := make([]net.IP, 0) + for _, address := range oldNode.Status.Addresses { + if address.Type != corev1.NodeInternalIP { + continue + } + nodeIP := net.ParseIP(address.Address) + if nodeIP == nil { + continue + } + if _, exists := ipsToKeep[nodeIP.String()]; !exists { + ipsToRemove = append(ipsToRemove, nodeIP) + } + } + + if err := removePMTUDNodeNFTRules(ipsToRemove); err != nil { + return fmt.Errorf("error removing node %q stale NFT rules during update: %w", oldNode.Name, err) + } + return h.nc.addOrUpdateNode(newNode) + default: return fmt.Errorf("no update function for object type %s", h.objType) } @@ -206,6 +271,10 @@ func (h *nodeEventHandler) DeleteResource(obj, _ interface{}) error { endpointslice := obj.(*discovery.EndpointSlice) return h.nc.reconcileConntrackUponEndpointSliceEvents(endpointslice, nil) + case factory.NodeType: + h.nc.deleteNode(obj.(*corev1.Node)) + return nil + default: return fmt.Errorf("no delete function for object type %s", h.objType) } @@ -224,6 +293,8 @@ func (h *nodeEventHandler) SyncFunc(objs []interface{}) error { factory.EndpointSliceForStaleConntrackRemovalType: // no sync needed syncFunc = nil + case factory.NodeType: + syncFunc = h.nc.syncNodes default: return fmt.Errorf("no sync function for object type %s", h.objType) diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index 8f9d3f112a..5fa7d77865 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -59,7 +59,7 @@ func (c *openflowManager) delNetwork(nInfo util.NetInfo) { } func (c *openflowManager) getActiveNetwork(nInfo util.NetInfo) *bridgeUDNConfiguration { - return c.defaultBridge.getActiveNetworkBridgeConfig(nInfo.GetNetworkName()) + return c.defaultBridge.getActiveNetworkBridgeConfigCopy(nInfo.GetNetworkName()) } // END UDN UTILs @@ -210,6 +210,21 @@ func (c *openflowManager) Run(stopChan <-chan struct{}, doneWg *sync.WaitGroup) }() } +func (c *openflowManager) updateBridgePMTUDFlowCache(key string, ipAddrs []string) { + // protect defaultBridge config from being updated by gw.nodeIPManager + c.defaultBridge.Lock() + defer c.defaultBridge.Unlock() + + dftFlows := pmtudDropFlows(c.defaultBridge, ipAddrs) + c.updateFlowCacheEntry(key, dftFlows) + if c.externalGatewayBridge != nil { + c.externalGatewayBridge.Lock() + defer c.externalGatewayBridge.Unlock() + exGWBridgeDftFlows := pmtudDropFlows(c.externalGatewayBridge, ipAddrs) + c.updateExBridgeFlowCacheEntry(key, exGWBridgeDftFlows) + } +} + // updateBridgeFlowCache generates the "static" per-bridge flows // note: this is shared between shared and local gateway modes func (c *openflowManager) updateBridgeFlowCache(hostIPs []net.IP, hostSubnets []*net.IPNet) error { diff --git a/go-controller/pkg/node/routemanager/route_manager.go b/go-controller/pkg/node/routemanager/route_manager.go index 844edc2a85..e3480e4ca8 100644 --- a/go-controller/pkg/node/routemanager/route_manager.go +++ b/go-controller/pkg/node/routemanager/route_manager.go @@ -3,6 +3,8 @@ package routemanager import ( "fmt" "net" + "slices" + "strings" "sync" "time" @@ -10,27 +12,31 @@ import ( "golang.org/x/sys/unix" "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) -// MainTableID is the default routing table. IPRoute2 names the default routing table as 'main' -const MainTableID = 254 +// key of a managed route, only one route allowed with the same key +type key struct { + dst string + table int + priority int +} type Controller struct { *sync.Mutex - store map[int][]netlink.Route // key is link index + store map[key]*netlink.Route } -// NewController manages routes which include adding and deletion of routes. It also manages restoration of managed routes. -// Begin managing routes by calling Run() to start the manager. -// Routes should be added via add(route) and deletion via del(route) functions only. -// All other functions are used internally. +// NewController manages routes which include adding and deletion of routes. It +// also manages restoration of managed routes. Begin managing routes by calling +// Run() to start the manager. Routes should be added via Add(route) and +// deletion via Del(route) functions only. All other functions are used +// internally. func NewController() *Controller { return &Controller{ Mutex: &sync.Mutex{}, - store: make(map[int][]netlink.Route), + store: make(map[key]*netlink.Route), } } @@ -51,105 +57,84 @@ func (c *Controller) Run(stopCh <-chan struct{}, syncPeriod time.Duration) { return case newRouteEvent, ok := <-routeEventCh: if !ok { - klog.Info("Route Manager: failed to read netlink route event - resubscribing") + klog.Warning("Route Manager: netlink route events subscription lost, resubscribing...") subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) continue } if err = c.processNetlinkEvent(newRouteEvent); err != nil { // TODO: make util.GetNetLinkOps().IsLinkNotFoundError(err) smarter to unwrap error // and use it here to log errors that are not IsLinkNotFoundError - klog.V(5).Infof("Route Manager: failed to process route update event (%s): %v", newRouteEvent.String(), err) + klog.Errorf("Route Manager: failed to process route update event %v: %v", newRouteEvent, err) } case <-ticker.C: if !subscribed { - klog.Info("Route Manager: netlink route events aren't subscribed - resubscribing") + klog.Warning("Route Manager: netlink route events subscription lost, resubscribing...") subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) } c.sync() + ticker.Reset(syncPeriod) } } } -// Add submits a request to add a route +// Add submits a request to add a route, instructing the kernel to replace a +// previously existing route. Route manager will periodically sync to ensure the +// provided route is installed and that no other routes with the same priority, +// prefix and table tuple exist. Thus note that if the provided route is not a +// replacement for an existing route, multiple routes with the same priority, +// prefix and table tuple may exist until sync happens. func (c *Controller) Add(r netlink.Route) error { - if err := c.addRoute(r); err != nil { - return fmt.Errorf("route manager: failed to add route (%s): %w", r.String(), err) - } - return nil + c.Lock() + defer c.Unlock() + return c.addRoute(&r) } -// Del submits a request to del a route +// Del submits a request to delete and forget a route. func (c *Controller) Del(r netlink.Route) error { - if err := c.delRoute(r); err != nil { - return fmt.Errorf("route manager: failed to delete route (%s): %v", r.String(), err) - } - return nil + c.Lock() + defer c.Unlock() + return c.delRoute(&r) } // addRoute attempts to add the route and returns with error // if it fails to do so. -func (c *Controller) addRoute(r netlink.Route) error { - c.Lock() - defer c.Unlock() - klog.Infof("Route Manager: attempting to add route: %s", r.String()) - // If table is unspecified aka 0, then set it to main table ID. This is done by default when adding a route. - // Set it explicitly to aid comparison of routes. - if r.Table == 0 { - r.Table = MainTableID +func (c *Controller) addRoute(r *netlink.Route) error { + r, err := validateAndNormalizeRoute(r) + if err != nil { + return err } - if addedToStore := c.addRouteToStore(r); !addedToStore { + if c.hasRouteInStore(r) { // already managed - nothing to do return nil } - if r.LinkIndex != 0 { - _, err := util.GetNetLinkOps().LinkByIndex(r.LinkIndex) - if err != nil { - return fmt.Errorf("failed to apply route (%s) because unable to get link: %v", r.String(), err) - } - } - if err := c.applyRoute(r.LinkIndex, r.Gw, r.Dst, r.MTU, r.Src, r.Table, r.Priority, r.Type, r.Scope); err != nil { - return fmt.Errorf("failed to apply route (%s): %v", r.String(), err) + err = c.netlinkAddRoute(r) + if err != nil { + return err } - klog.Infof("Route Manager: completed adding route: %s", r.String()) + c.addRouteToStore(r) return nil } // delRoute attempts to remove the route and returns with error // if it fails to do so. -func (c *Controller) delRoute(r netlink.Route) error { - c.Lock() - defer c.Unlock() - klog.Infof("Route Manager: attempting to delete route: %s", r.String()) - if r.LinkIndex != 0 { - _, err := util.GetNetLinkOps().LinkByIndex(r.LinkIndex) - if err != nil { - if util.GetNetLinkOps().IsLinkNotFoundError(err) { - delete(c.store, r.LinkIndex) - return nil - } - return fmt.Errorf("failed to delete route (%s) because unable to get link: %v", r.String(), err) - } - } - if err := c.netlinkDelRoute(r.LinkIndex, r.Dst, r.Table); err != nil { - return fmt.Errorf("failed to delete route (%s): %v", r.String(), err) +func (c *Controller) delRoute(r *netlink.Route) error { + r, err := validateAndNormalizeRoute(r) + if err != nil { + return err } - managedRoutes, ok := c.store[r.LinkIndex] - if !ok { - return nil + err = c.netlinkDelRoute(r) + if err != nil { + return err } - // remove route from existing routes - managedRoutesTemp := make([]netlink.Route, 0, len(managedRoutes)) - for _, managedRoute := range managedRoutes { - if !RoutePartiallyEqual(managedRoute, r) { - managedRoutesTemp = append(managedRoutesTemp, managedRoute) + // also remove the route we had in store if different + o := c.store[keyFromNetlink(r)] + if o != nil && !util.RouteEqual(r, o) { + err = c.netlinkDelRoute(o) + if err != nil { + return err } } - if len(managedRoutesTemp) == 0 { - delete(c.store, r.LinkIndex) - } else { - c.store[r.LinkIndex] = managedRoutesTemp - } - klog.Infof("Route Manager: deletion of routes for link complete: %s", r.String()) + c.removeRouteFromStore(r) return nil } @@ -158,225 +143,217 @@ func (c *Controller) delRoute(r netlink.Route) error { func (c *Controller) processNetlinkEvent(ru netlink.RouteUpdate) error { c.Lock() defer c.Unlock() - if ru.Type == unix.RTM_NEWROUTE { - // An event resulting from `ip route change` will be seen as type RTM_NEWROUTE event and therefore this function will only - // log the changes and not attempt to restore the change. This will be accomplished by the sync function. - klog.Infof("Route Manager: netlink route addition event: %q", ru.String()) - return nil - } - if ru.Type != unix.RTM_DELROUTE { + r := c.store[keyFromNetlink(&ru.Route)] + if r == nil { return nil } - klog.V(5).Infof("Route Manager: netlink route deletion event: %q", ru.String()) - managedRoutes, ok := c.store[ru.LinkIndex] - if !ok { - // we don't manage this interface - return nil - } - for _, managedRoute := range managedRoutes { - if RoutePartiallyEqual(managedRoute, ru.Route) { - if managedRoute.LinkIndex != 0 { - _, err := util.GetNetLinkOps().LinkByIndex(managedRoute.LinkIndex) - if err != nil { - klog.Errorf("Route Manager: failed to restore route because unable to get link by index %d: %v", managedRoute.LinkIndex, err) - continue - } - } - if err := c.applyRoute(managedRoute.LinkIndex, managedRoute.Gw, managedRoute.Dst, managedRoute.MTU, managedRoute.Src, managedRoute.Table, - managedRoute.Priority, managedRoute.Type, managedRoute.Scope); err != nil { - klog.Errorf("Route Manager: failed to apply route (%s): %v", managedRoute.String(), err) - } - } + if ru.Type == unix.RTM_DELROUTE || !routePartiallyEqualWantedToExisting(r, &ru.Route) { + return c.netlinkAddRoute(r) } return nil } -func (c *Controller) applyRoute(linkIndex int, gwIP net.IP, subnet *net.IPNet, mtu int, src net.IP, - table, priority, rtype int, scope netlink.Scope) error { - filterRoute, filterMask := filterRouteByDstAndTable(linkIndex, subnet, table) - existingRoutes, err := util.GetNetLinkOps().RouteListFiltered(getNetlinkIPFamily(subnet), filterRoute, filterMask) +func (c *Controller) netlinkAddRoute(r *netlink.Route) error { + err := util.GetNetLinkOps().RouteReplace(r) if err != nil { - return fmt.Errorf("failed to list filtered routes: %v", err) - } - if len(existingRoutes) == 0 { - return c.netlinkAddRoute(linkIndex, gwIP, subnet, mtu, src, table, priority, rtype, scope) - } - netlinkRoute := &existingRoutes[0] - if netlinkRoute.MTU != mtu || !src.Equal(netlinkRoute.Src) || !gwIP.Equal(netlinkRoute.Gw) { - netlinkRoute.MTU = mtu - netlinkRoute.Src = src - netlinkRoute.Gw = gwIP - err = util.GetNetLinkOps().RouteReplace(netlinkRoute) - if err != nil { - return fmt.Errorf("failed to replace route for subnet %s via gateway %s with mtu %d: %v", - subnet.String(), gwIP.String(), mtu, err) - } + return fmt.Errorf("failed to add route %s: %w", r, err) } + klog.V(5).Infof("Route Manager: added route %s", r) return nil } -func (c *Controller) netlinkAddRoute(linkIndex int, gwIP net.IP, subnet *net.IPNet, - mtu int, srcIP net.IP, table, priority, rtype int, scope netlink.Scope) error { - newNlRoute := &netlink.Route{ - Dst: subnet, - LinkIndex: linkIndex, - Scope: netlink.SCOPE_UNIVERSE, - Table: table, - } - if len(gwIP) > 0 { - newNlRoute.Gw = gwIP - } - if len(srcIP) > 0 { - newNlRoute.Src = srcIP - } - if mtu != 0 { - newNlRoute.MTU = mtu - } - if priority != 0 { - newNlRoute.Priority = priority - } - if rtype != 0 { - newNlRoute.Type = rtype - } - if scope != netlink.Scope(0) { - newNlRoute.Scope = scope - } - err := util.GetNetLinkOps().RouteAdd(newNlRoute) - if err != nil { - return fmt.Errorf("failed to add route (linkIndex: %d gw: %v, subnet %v, mtu %d, src IP %v): %v", - newNlRoute.LinkIndex, gwIP, subnet, mtu, srcIP, err) +func (c *Controller) netlinkDelRoute(r *netlink.Route) error { + err := util.GetNetLinkOps().RouteDel(r) + if err != nil && !isRouteNotFoundError(err) { + return fmt.Errorf("failed to delete route %s: %w", r, err) } + klog.V(5).Infof("Route Manager: deleted route %s", r) return nil } -func (c *Controller) netlinkDelRoute(linkIndex int, subnet *net.IPNet, table int) error { - if subnet == nil { - return fmt.Errorf("cannot delete route with no valid subnet") - } - filter, mask := filterRouteByDstAndTable(linkIndex, subnet, table) - existingRoutes, err := util.GetNetLinkOps().RouteListFiltered(netlink.FAMILY_ALL, filter, mask) - if err != nil { - return fmt.Errorf("failed to get routes for link %d: %v", linkIndex, err) - } - for _, existingRoute := range existingRoutes { - if err = util.GetNetLinkOps().RouteDel(&existingRoute); err != nil { - return err - } - } - return nil +// addRouteToStore adds routes to the internal cache +// Must be called with the controller locked +func (c *Controller) addRouteToStore(r *netlink.Route) { + route := keyFromNetlink(r) + c.store[route] = r } -// addRouteToStore adds routes to the internal cache +// removeRouteFromStore removes route from the internal cache // Must be called with the controller locked -func (c *Controller) addRouteToStore(r netlink.Route) bool { - existingRoutes, ok := c.store[r.LinkIndex] - if !ok { - c.store[r.LinkIndex] = []netlink.Route{r} - return true +func (c *Controller) removeRouteFromStore(r *netlink.Route) { + delete(c.store, keyFromNetlink(r)) +} + +// hasRouteInStore checks if a route with the same key is stored in the +// internal cache as requested. Must be called with the controller locked +func (c *Controller) hasRouteInStore(r *netlink.Route) bool { + route := c.store[keyFromNetlink(r)] + return route != nil && util.RouteEqual(r, route) +} + +func validateAndNormalizeRoute(r *netlink.Route) (*netlink.Route, error) { + if r == nil { + return nil, fmt.Errorf("nil route provided") } - for _, existingRoute := range existingRoutes { - if RoutePartiallyEqual(existingRoute, r) { - return false - } + if r.Table == unix.RT_TABLE_UNSPEC { + r.Table = unix.RT_TABLE_MAIN } - c.store[r.LinkIndex] = append(existingRoutes, r) - return true + return r, nil } -// sync will iterate through all routes seen on a node and ensure any route manager managed routes are applied. Any additional -// routes for this link are preserved. sync only inspects routes for links which we managed and ignore routes for non-managed links. +func keyFromNetlink(r *netlink.Route) key { + return key{ + dst: r.Dst.String(), + table: r.Table, + priority: r.Priority, + } +} + +// sync will iterate through all routes seen on a node and ensure any route +// manager managed routes are applied. Any conflicting additional routes are +// removed. Other routes are preserved. func (c *Controller) sync() { c.Lock() defer c.Unlock() - deletedLinkIndexes := make([]int, 0) - for linkIndex, managedRoutes := range c.store { - for _, managedRoute := range managedRoutes { - filterRoute, filterMask := filterRouteByDstAndTable(linkIndex, managedRoute.Dst, managedRoute.Table) - existingRoutes, err := util.GetNetLinkOps().RouteListFiltered(netlink.FAMILY_ALL, filterRoute, filterMask) + + var read, added, deleted int + start := time.Now() + defer func() { + klog.V(5).Infof("Route Manager: synced routes: stored[%d] read[%d] added[%d] deleted[%d], took %s", + len(c.store), + read, + added, + deleted, + time.Since(start), + ) + }() + + // there can be many routes on the system so make sure we list them as few + // times as possible + // note that RouteListFiltered dumps ALL routes, filtering happens on the + // client side + // we need to filter by table without specifying any table to get routes + // form all tables + filter := &netlink.Route{} + mask := netlink.RT_FILTER_TABLE + existing, err := util.GetNetLinkOps().RouteListFiltered(netlink.FAMILY_ALL, filter, mask) + if err != nil { + klog.Errorf("Route Manager: failed to list routes: %v", err) + return + } + read = len(existing) + + existingAndTracked := map[key][]*netlink.Route{} + for _, r := range existing { + key := keyFromNetlink(&r) + wants := c.store[key] + if wants == nil { + continue + } + existingAndTracked[key] = append(existingAndTracked[key], &r) + } + + for key, wants := range c.store { + existing := existingAndTracked[key] + if len(existing) == 1 && routePartiallyEqualWantedToExisting(wants, existing[0]) { + continue + } + // take the safe approach to delete routes before adding ours to make + // sure we don't end up deleting what we shouldn't + // deleting now may cause network blips until we add our route but + // nobody should be manipulating conflicting routes anyway + for _, r := range existing { + err := c.netlinkDelRoute(r) if err != nil { - klog.Errorf("Route Manager: failed to list routes for link %d with route filter %s and mask filter %d: %v", linkIndex, filterRoute.String(), filterMask, err) + klog.Errorf("Route Manager: failed while syncing: %v", err) continue } - var found bool - for _, activeRoute := range existingRoutes { - if RoutePartiallyEqual(activeRoute, managedRoute) { - found = true - break - } - } - if !found { - if managedRoute.LinkIndex != 0 { - _, err := util.GetNetLinkOps().LinkByIndex(managedRoute.LinkIndex) - if err != nil { - if util.GetNetLinkOps().IsLinkNotFoundError(err) { - deletedLinkIndexes = append(deletedLinkIndexes, linkIndex) - } else { - klog.Errorf("Route Manager: failed to apply route (%s) because unable to retrieve associated link: %v", managedRoute.String(), err) - } - continue - } - } - if err := c.applyRoute(managedRoute.LinkIndex, managedRoute.Gw, managedRoute.Dst, managedRoute.MTU, managedRoute.Src, managedRoute.Table, - managedRoute.Priority, managedRoute.Type, managedRoute.Scope); err != nil { - klog.Errorf("Route Manager: failed to apply route (%s): %v", managedRoute.String(), err) - } - } + klog.Warningf("Route Manager: removed unexpected route %s", r) + deleted++ } - } - for _, linkIndex := range deletedLinkIndexes { - klog.Infof("Route Manager: removing all routes associated with link index %d because link deleted", linkIndex) - delete(c.store, linkIndex) + err := c.netlinkAddRoute(wants) + if err != nil { + klog.Errorf("Route Manager: failed while syncing: %v", err) + continue + } + added++ } } -func getNetlinkIPFamily(ipNet *net.IPNet) int { - if utilnet.IsIPv6(ipNet.IP) { - return netlink.FAMILY_V6 - } else { - return netlink.FAMILY_V4 +func subscribeNetlinkRouteEvents(stopCh <-chan struct{}) (bool, chan netlink.RouteUpdate) { + routeEventCh := make(chan netlink.RouteUpdate, 20) + if err := netlink.RouteSubscribe(routeEventCh, stopCh); err != nil { + klog.Errorf("Route Manager: failed to subscribe to netlink route events: %v", err) + return false, routeEventCh } + return true, routeEventCh } -func filterRouteByDstAndTable(linkIndex int, subnet *net.IPNet, table int) (*netlink.Route, uint64) { - return &netlink.Route{ - Dst: subnet, - LinkIndex: linkIndex, - Table: table, - }, - netlink.RT_FILTER_DST | netlink.RT_FILTER_OIF | netlink.RT_FILTER_TABLE +func equalOrLeftZero[T comparable](l, r, z T) bool { + return l == z || l == r } -func filterRouteByTable(linkIndex, table int) (*netlink.Route, uint64) { - return &netlink.Route{ - LinkIndex: linkIndex, - Table: table, - }, - netlink.RT_FILTER_OIF | netlink.RT_FILTER_TABLE +func equalOrLeftZeroFunc[T any](eq func(l, r T) bool, l, r, z T) bool { + return eq(l, z) || eq(l, r) } -func subscribeNetlinkRouteEvents(stopCh <-chan struct{}) (bool, chan netlink.RouteUpdate) { - routeEventCh := make(chan netlink.RouteUpdate, 20) - if err := netlink.RouteSubscribe(routeEventCh, stopCh); err != nil { - klog.Errorf("Route Manager: failed to subscribe to netlink route events: %v", err) - return false, routeEventCh +// routePartiallyEqualWantedToExisting compares non zero values of left wanted route with the +// right existing route. The reason for not using the Equal method associated +// with type netlink.Route is because a user will only specify a limited subset +// of fields but when we introspect routes seen on the system, other fields are +// populated by default and therefore won't be equal anymore with user defined +// routes. Also, netlink.Routes Equal method doesn't compare MTU. +func routePartiallyEqualWantedToExisting(w, e *netlink.Route) bool { + if (w == nil) != (e == nil) { + return false + } + if w == e { + return true } - return true, routeEventCh + // this compares dst, table and priority which must be equal for us + if keyFromNetlink(w) != keyFromNetlink(e) { + return false + } + var z netlink.Route + return equalOrLeftZero(w.LinkIndex, e.LinkIndex, z.LinkIndex) && + equalOrLeftZero(w.ILinkIndex, e.ILinkIndex, z.ILinkIndex) && + equalOrLeftZero(w.Scope, e.Scope, z.Scope) && + equalOrLeftZeroFunc(func(l, r net.IP) bool { return l.Equal(r) }, w.Src, e.Src, z.Src) && + equalOrLeftZeroFunc(func(l, r net.IP) bool { return l.Equal(r) }, w.Gw, e.Gw, z.Gw) && + equalOrLeftZeroFunc( + func(l, r []*netlink.NexthopInfo) bool { + return slices.EqualFunc(l, r, + func(l, r *netlink.NexthopInfo) bool { return l == r || (l != nil && r != nil && l.Equal(*r)) }, + ) + }, w.MultiPath, e.MultiPath, z.MultiPath) && + equalOrLeftZero(w.Protocol, e.Protocol, z.Protocol) && + equalOrLeftZero(w.Family, e.Family, z.Family) && + equalOrLeftZero(w.Type, e.Type, z.Type) && + equalOrLeftZero(w.Tos, e.Tos, z.Tos) && + equalOrLeftZero(w.Flags, e.Flags, z.Flags) && + equalOrLeftZeroFunc(func(l, r *int) bool { return l == r || (l != nil && r != nil && *l == *r) }, w.MPLSDst, e.MPLSDst, z.MPLSDst) && + equalOrLeftZeroFunc(func(l, r netlink.Destination) bool { return l == r || (l != nil && r != nil && l.Equal(r)) }, w.NewDst, e.NewDst, z.NewDst) && + equalOrLeftZeroFunc(func(l, r netlink.Encap) bool { return l == r || (l != nil && r != nil && l.Equal(r)) }, w.Encap, e.Encap, z.Encap) && + equalOrLeftZeroFunc(func(l, r netlink.Destination) bool { return l == r || (l != nil && r != nil && l.Equal(r)) }, w.Via, e.Via, z.Via) && + equalOrLeftZero(w.Realm, e.Realm, z.Realm) && + equalOrLeftZero(w.MTU, e.MTU, z.MTU) && + equalOrLeftZero(w.Window, e.Window, z.Window) && + equalOrLeftZero(w.Rtt, e.Rtt, z.Rtt) && + equalOrLeftZero(w.RttVar, e.RttVar, z.RttVar) && + equalOrLeftZero(w.Ssthresh, e.Ssthresh, z.Ssthresh) && + equalOrLeftZero(w.Cwnd, e.Cwnd, z.Cwnd) && + equalOrLeftZero(w.AdvMSS, e.AdvMSS, z.AdvMSS) && + equalOrLeftZero(w.Reordering, e.Reordering, z.Reordering) && + equalOrLeftZero(w.Hoplimit, e.Hoplimit, z.Hoplimit) && + equalOrLeftZero(w.InitCwnd, e.InitCwnd, z.InitCwnd) && + equalOrLeftZero(w.Features, e.Features, z.Features) && + equalOrLeftZero(w.RtoMin, e.RtoMin, z.RtoMin) && + equalOrLeftZero(w.InitRwnd, e.InitRwnd, z.InitRwnd) && + equalOrLeftZero(w.QuickACK, e.QuickACK, z.QuickACK) && + equalOrLeftZero(w.Congctl, e.Congctl, z.Congctl) && + equalOrLeftZero(w.FastOpenNoCookie, e.FastOpenNoCookie, z.FastOpenNoCookie) } -// RoutePartiallyEqual compares a limited set of route attributes. -// The reason for not using the Equal method associated with type netlink.Route is because a user will only specify a limited -// subset of fields but when we introspect routes seen on the system, other fields are populated by default and therefore -// won't be equal anymore with user defined routes. Compare a limited set of fields that we care about. -// Also, netlink.Routes Equal method doesn't compare MTU. -func RoutePartiallyEqual(r, x netlink.Route) bool { - return r.LinkIndex == x.LinkIndex && - util.IsIPNetEqual(r.Dst, x.Dst) && - r.Src.Equal(x.Src) && - r.Gw.Equal(x.Gw) && - r.Table == x.Table && - r.Flags == x.Flags && - r.MTU == x.MTU && - r.Type == x.Type && - r.Priority == x.Priority && - r.Scope == x.Scope +func isRouteNotFoundError(err error) bool { + return strings.Contains(err.Error(), "no such process") } diff --git a/go-controller/pkg/node/routemanager/route_manager_test.go b/go-controller/pkg/node/routemanager/route_manager_test.go index d3fe0efeeb..d090a6dbe2 100644 --- a/go-controller/pkg/node/routemanager/route_manager_test.go +++ b/go-controller/pkg/node/routemanager/route_manager_test.go @@ -2,6 +2,7 @@ package routemanager import ( "net" + "reflect" "runtime" "sync" "time" @@ -14,10 +15,14 @@ import ( "golang.org/x/sys/unix" utilsnet "k8s.io/utils/net" + "k8s.io/utils/ptr" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" ) +// mainTableID is the default routing table. IPRoute2 names the default routing table as 'main' +const mainTableID = 254 + var _ = ginkgo.Describe("Route Manager", func() { defer ginkgo.GinkgoRecover() var rm *Controller @@ -112,54 +117,54 @@ var _ = ginkgo.Describe("Route Manager", func() { }) ginkgo.It("applies route with subnet, gateway IP, src IP, MTU", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Gw: loGWIP, MTU: loMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Gw: loGWIP, MTU: loMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("applies route with subnets, gateway IP, src IP", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("applies route with subnets, gateway IP", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("applies route with subnets", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("route exists, has different mtu and is updated", func() { - route := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + route := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRoute(testNS, route)).Should(gomega.Succeed()) - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loAlternativeMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loAlternativeMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("route exists, has different src and is updated", func() { - route := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + route := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRoute(testNS, route)).Should(gomega.Succeed()) - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Src: loIPDiff, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, Src: loIPDiff, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) @@ -188,75 +193,75 @@ var _ = ginkgo.Describe("Route Manager", func() { ginkgo.Context("del route", func() { ginkgo.It("del route with dst", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(delRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeFalse()) }) ginkgo.It("del route with dst and gateway", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Gw: loGWIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Gw: loGWIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(delRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeFalse()) }) ginkgo.It("del route with dst, gateway and MTU", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Gw: loGWIP, MTU: loMTU, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Gw: loGWIP, MTU: loMTU, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(delRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeFalse()) }) ginkgo.It("del route amongst multiple managed routes present", func() { - rAlt := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: MainTableID, Type: unix.RTN_UNICAST} + rAlt := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, rAlt)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) - rDefault := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: MainTableID, Type: unix.RTN_UNICAST} + rDefault := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, rDefault)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRoutesInTable(testNS, []netlink.Route{rDefault, rAlt}, loLink.Attrs().Index, MainTableID) + return isRoutesInTable(testNS, []netlink.Route{rDefault, rAlt}, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(delRouteViaManager(rm, testNS, rAlt)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeFalse()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, rDefault, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, rDefault, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("del route and ignores unmanaged route", func() { - rAlt := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: MainTableID, Type: unix.RTN_UNICAST} + rAlt := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: altSubnet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRoute(testNS, rAlt)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) - rDefault := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: MainTableID, Type: unix.RTN_UNICAST} + rDefault := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, rDefault)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRoutesInTable(testNS, []netlink.Route{rDefault, rAlt}, loLink.Attrs().Index, MainTableID) + return isRoutesInTable(testNS, []netlink.Route{rDefault, rAlt}, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(delRouteViaManager(rm, testNS, rDefault)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, rAlt, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) @@ -271,10 +276,10 @@ var _ = ginkgo.Describe("Route Manager", func() { ginkgo.Context("runtime sync", func() { ginkgo.It("reapplies managed route that was removed (gw IP, mtu, src IP)", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Gw: loGWIP, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) // clear routes and wait for sync to reapply routeList, err := getRouteList(testNS, loLink, netlink.FAMILY_ALL) @@ -283,15 +288,15 @@ var _ = ginkgo.Describe("Route Manager", func() { gomega.Expect(deleteRoutes(testNS, routeList...)).ShouldNot(gomega.HaveOccurred()) // wait for sync to activate since managed routes have been deleted gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("reapplies managed route that was removed (mtu, src IP)", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) // clear routes and wait for sync to reapply routeList, err := getRouteList(testNS, loLink, netlink.FAMILY_ALL) @@ -300,23 +305,23 @@ var _ = ginkgo.Describe("Route Manager", func() { gomega.Expect(deleteRoutes(testNS, routeList...)).ShouldNot(gomega.HaveOccurred()) // wait for sync to activate since managed routes have been deleted gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) ginkgo.It("reapplies managed route that was removed because link is down", func() { - r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: loLink.Attrs().Index, Dst: loSubnet, MTU: loMTU, Src: loIP, Table: mainTableID, Type: unix.RTN_UNICAST} gomega.Expect(addRouteViaManager(rm, testNS, r)).Should(gomega.Succeed()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) gomega.Expect(setLinkDown(testNS, loLink)).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeFalse()) gomega.Expect(setLinkUp(testNS, loLink)).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(func() bool { - return isRouteInTable(testNS, r, loLink.Attrs().Index, MainTableID) + return isRouteInTable(testNS, r, loLink.Attrs().Index, mainTableID) }, time.Second).Should(gomega.BeTrue()) }) @@ -340,7 +345,7 @@ var _ = ginkgo.Describe("Route Manager", func() { if err = netlink.LinkSetUp(link); err != nil { return err } - r := netlink.Route{LinkIndex: link.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: MainTableID, Type: unix.RTN_UNICAST} + r := netlink.Route{LinkIndex: link.Attrs().Index, Dst: v4DefaultRouteIPNet, Table: mainTableID, Type: unix.RTN_UNICAST} if err = rm.Add(r); err != nil { return err } @@ -351,6 +356,70 @@ var _ = ginkgo.Describe("Route Manager", func() { }) }) +var _ = ginkgo.Describe("Route Manager", func() { + ginkgo.It("partially compares expected routes with installed routes", func() { + values := map[string]any{ + "int": 1, + "Scope": uint8(1), + "IPNet": ovntest.MustParseIPNet("10.0.0.0/16"), + "IP": ovntest.MustParseIP("10.0.0.0"), + "NexthopInfo": []*netlink.NexthopInfo{{LinkIndex: 1}}, + "RouteProtocol": 1, + "*int": ptr.To(1), + "Destination": &netlink.Via{Addr: ovntest.MustParseIP("10.0.0.0")}, + "Encap": &netlink.IP6tnlEncap{Src: ovntest.MustParseIP("10.0.0.0")}, + "string": "test", + } + keys := map[string]bool{ + "Dst": true, + "Priority": true, + "Table": true, + } + + var getName func(reflect.Type, string) string + getName = func(t reflect.Type, prefix string) string { + name := prefix + t.Name() + _, known := values[name] + if known { + return name + } + kind := t.Kind() + switch kind { + case reflect.Pointer: + return getName(t.Elem(), "*") + case reflect.Slice: + return getName(t.Elem(), "[]") + default: + return t.Name() + } + } + + // we iterate all the fields of a Route and test that: + // - correctly detects differences of non zero left values against zero right values + // - correctly detects differences of zero left values against non zero right values if key + // - correctly ignores differences of zero left values against non zero right values if not key + var z netlink.Route + zv := reflect.ValueOf(z) + for i := 0; i < zv.NumField(); i++ { + var t netlink.Route + tv := reflect.ValueOf(&t).Elem() + fv := tv.Field(i) + ft := fv.Type() + fn := tv.Type().Field(i).Name + + ftn := getName(ft, "") + + gomega.Expect(values).To(gomega.HaveKey(ftn), "unexpected field %q of type %s", fn, ftn) + + fv.Set(reflect.ValueOf(values[ftn]).Convert(ft)) + + isKey := keys[fn] + gomega.Expect(routePartiallyEqualWantedToExisting(&t, &z)).To(gomega.BeFalse(), "differences of non zero left values against zero right values not detected for field %s", fn) + gomega.Expect(routePartiallyEqualWantedToExisting(&z, &t)).ToNot(gomega.Equal(isKey), "differences of zero left values against non zero right values not ignored (or detected if field is key) for field %s", fn) + } + }) +}) + func addRouteViaManager(rm *Controller, targetNS ns.NetNS, r netlink.Route) error { return targetNS.Do(func(ns.NetNS) error { return rm.Add(r) }) } @@ -369,6 +438,14 @@ func isRouteInTable(targetNs ns.NetNS, expectedRoute netlink.Route, linkIndex, t return isRoutesInTable(targetNs, []netlink.Route{expectedRoute}, linkIndex, table) } +func filterRouteByTable(linkIndex, table int) (*netlink.Route, uint64) { + return &netlink.Route{ + LinkIndex: linkIndex, + Table: table, + }, + netlink.RT_FILTER_OIF | netlink.RT_FILTER_TABLE +} + // isRoutesInTable ensures only the slice of expected routes for a link are present within a table func isRoutesInTable(targetNs ns.NetNS, expectedRoutes []netlink.Route, linkIndex, table int) bool { if len(expectedRoutes) == 0 { @@ -391,7 +468,7 @@ func isRoutesInTable(targetNs ns.NetNS, expectedRoutes []netlink.Route, linkInde for _, expectedRoute := range expectedRoutes { found = false for _, existingRoute := range existingRoutes { - if RoutePartiallyEqual(existingRoute, expectedRoute) { + if routePartiallyEqualWantedToExisting(&expectedRoute, &existingRoute) { found = true break } diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index e5d1c21024..1a3f8685e4 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -5,9 +5,12 @@ import ( "fmt" "net" "reflect" + "strconv" "sync" "time" + nadinformerv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1" + corev1 "k8s.io/api/core/v1" knet "k8s.io/api/networking/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -36,6 +39,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/observability" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + nqoscontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/network_qos" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/routeimport" zoneic "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/zone_interconnect" @@ -181,6 +185,9 @@ type BaseNetworkController struct { observManager *observability.Manager routeImportManager routeimport.Manager + + // Controller used for programming OVN for Network QoS + nqosController *nqoscontroller.Controller } func (oc *BaseNetworkController) reconcile(netInfo util.NetInfo, setNodeFailed func(string)) error { @@ -379,10 +386,19 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h gwIfAddr := util.GetNodeGatewayIfAddr(hostSubnet) lrpNetworks = append(lrpNetworks, gwIfAddr.String()) } + + var lrpOptions map[string]string + enableGatewayMTU := util.ParseNodeGatewayMTUSupport(node) + if enableGatewayMTU { + lrpOptions = map[string]string{ + "gateway_mtu": strconv.Itoa(config.Default.MTU), + } + } logicalRouterPort := nbdb.LogicalRouterPort{ Name: lrpName, MAC: nodeLRPMAC.String(), Networks: lrpNetworks, + Options: lrpOptions, } logicalRouter := nbdb.LogicalRouter{Name: logicalRouterName} gatewayChassis := nbdb.GatewayChassis{ @@ -392,7 +408,7 @@ func (bnc *BaseNetworkController) syncNodeClusterRouterPort(node *corev1.Node, h } err = libovsdbops.CreateOrUpdateLogicalRouterPort(bnc.nbClient, &logicalRouter, &logicalRouterPort, - &gatewayChassis, &logicalRouterPort.MAC, &logicalRouterPort.Networks) + &gatewayChassis, &logicalRouterPort.MAC, &logicalRouterPort.Networks, &logicalRouterPort.Options) if err != nil { klog.Errorf("Failed to add gateway chassis %s to logical router port %s, error: %v", chassisID, lrpName, err) return err @@ -1056,6 +1072,31 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj return nil } +func (bnc *BaseNetworkController) newNetworkQoSController() error { + var err error + var nadInformer nadinformerv1.NetworkAttachmentDefinitionInformer + + if config.OVNKubernetesFeature.EnableMultiNetwork { + nadInformer = bnc.watchFactory.NADInformer() + } + bnc.nqosController, err = nqoscontroller.NewController( + bnc.controllerName, + bnc.ReconcilableNetInfo.GetNetInfo(), + bnc.nbClient, + bnc.recorder, + bnc.kube.NetworkQoSClient, + bnc.watchFactory.NetworkQoSInformer(), + bnc.watchFactory.NamespaceCoreInformer(), + bnc.watchFactory.PodCoreInformer(), + bnc.watchFactory.NodeCoreInformer(), + nadInformer, + bnc.addressSetFactory, + bnc.isPodScheduledinLocalZone, + bnc.zone, + ) + return err +} + func initLoadBalancerGroups(nbClient libovsdbclient.Client, netInfo util.NetInfo) ( clusterLoadBalancerGroupUUID, switchLoadBalancerGroupUUID, routerLoadBalancerGroupUUID string, err error) { diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 872a03780c..95ef04d1af 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -73,6 +73,22 @@ func (oc *BaseSecondaryLayer2NetworkController) cleanup() error { return err } + ops, err = libovsdbops.DeleteQoSesWithPredicateOps(oc.nbClient, ops, + func(item *nbdb.QoS) bool { + return item.ExternalIDs[types.NetworkExternalID] == netName + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting QoSes of network %s: %v", netName, err) + } + + ops, err = libovsdbops.DeleteAddressSetsWithPredicateOps(oc.nbClient, ops, + func(item *nbdb.AddressSet) bool { + return item.ExternalIDs[types.NetworkExternalID] == netName + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting address sets of network %s: %v", netName, err) + } + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) if err != nil { return fmt.Errorf("failed to deleting switches of network %s: %v", netName, err) @@ -121,6 +137,20 @@ func (oc *BaseSecondaryLayer2NetworkController) run() error { } } + // start NetworkQoS controller if feature is enabled + if config.OVNKubernetesFeature.EnableNetworkQoS { + err := oc.newNetworkQoSController() + if err != nil { + return fmt.Errorf("unable to create network qos controller, err: %w", err) + } + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + // Until we have scale issues in future let's spawn only one thread + oc.nqosController.Run(1, oc.stopChan) + }() + } + // Add ourselves to the route import manager if oc.routeImportManager != nil && config.Gateway.Mode == config.GatewayModeShared { err := oc.routeImportManager.AddNetwork(oc.GetNetInfo()) diff --git a/go-controller/pkg/ovn/controller/network_qos/metrics.go b/go-controller/pkg/ovn/controller/network_qos/metrics.go new file mode 100644 index 0000000000..96fa30834d --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/metrics.go @@ -0,0 +1,119 @@ +package networkqos + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" +) + +// Metrics to be exposed +var ( + nqosCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "num_network_qoses", + Help: "The total number of network qoses in the cluster", + }, + []string{"network"}, + ) + + nqosOvnOperationDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "nqos_ovn_operation_duration_ms", + Help: "Time spent on reconciling a NetworkQoS event", + Buckets: prometheus.ExponentialBuckets(.1, 2, 15), + }, + []string{"operation"}, + ) + + nqosReconcileDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "nqos_creation_duration_ms", + Help: "Time spent on reconciling a NetworkQoS event", + Buckets: prometheus.ExponentialBuckets(.1, 2, 15), + }, + []string{"network"}, + ) + + nqosPodReconcileDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "nqos_deletion_duration_ms", + Help: "Time spent on reconciling a Pod event", + Buckets: prometheus.ExponentialBuckets(.1, 2, 15), + }, + []string{"network"}, + ) + + nqosNamespaceReconcileDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "nqos_ns_reconcile_duration_ms", + Help: "Time spent on reconciling Namespace change for all Pods related to NetworkQoSes", + Buckets: prometheus.ExponentialBuckets(.1, 2, 15), + }, + []string{"network"}, + ) + + nqosStatusPatchDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metrics.MetricOvnkubeNamespace, + Subsystem: metrics.MetricOvnkubeSubsystemController, + Name: "nqos_status_patch_duration_ms", + Help: "Time spent on patching the status of a NetworkQoS", + }, + []string{"network"}, + ) +) + +func init() { + prometheus.MustRegister( + nqosCount, + nqosOvnOperationDuration, + nqosReconcileDuration, + nqosPodReconcileDuration, + nqosNamespaceReconcileDuration, + nqosStatusPatchDuration, + ) +} + +func (c *Controller) teardownMetricsCollector() { + prometheus.Unregister(nqosCount) +} + +// records the number of networkqos. +func updateNetworkQoSCount(network string, count int) { + nqosCount.WithLabelValues(network).Set(float64(count)) +} + +// records the reconciliation duration for networkqos +func recordNetworkQoSReconcileDuration(network string, duration int64) { + nqosReconcileDuration.WithLabelValues(network).Observe(float64(duration)) +} + +// records time spent on adding/removing a pod to/from networkqos rules +func recordPodReconcileDuration(network string, duration int64) { + nqosPodReconcileDuration.WithLabelValues(network).Observe(float64(duration)) +} + +// records time spent on handling a namespace event which is involved in networkqos +func recordNamespaceReconcileDuration(network string, duration int64) { + nqosNamespaceReconcileDuration.WithLabelValues(network).Observe(float64(duration)) +} + +// records time spent on an ovn operation +func recordOvnOperationDuration(operationType string, duration int64) { + nqosOvnOperationDuration.WithLabelValues(operationType).Observe(float64(duration)) +} + +// records time spent on patching the status of a NetworkQoS +func recordStatusPatchDuration(network string, duration int64) { + nqosStatusPatchDuration.WithLabelValues(network).Observe(float64(duration)) +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos.go b/go-controller/pkg/ovn/controller/network_qos/network_qos.go new file mode 100644 index 0000000000..6f8f247541 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos.go @@ -0,0 +1,468 @@ +package networkqos + +import ( + "context" + "fmt" + "sync" + "time" + + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + nqosapiapply "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/applyconfiguration/networkqos/v1alpha1" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +func (c *Controller) processNextNQOSWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + nqosKey, quit := c.nqosQueue.Get() + if quit { + return false + } + defer c.nqosQueue.Done(nqosKey) + + if err := c.syncNetworkQoS(nqosKey); err != nil { + if c.nqosQueue.NumRequeues(nqosKey) < maxRetries { + c.nqosQueue.AddRateLimited(nqosKey) + return true + } + klog.Warningf("%s: Failed to reconcile NetworkQoS %s: %v", c.controllerName, nqosKey, err) + utilruntime.HandleError(fmt.Errorf("failed to reconcile NetworkQoS %s: %v", nqosKey, err)) + } + c.nqosQueue.Forget(nqosKey) + return true +} + +// syncNetworkQoS decides the main logic everytime +// we dequeue a key from the nqosQueue cache +func (c *Controller) syncNetworkQoS(key string) error { + nqosNamespace, nqosName, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + startTime := time.Now() + c.nqosCache.LockKey(key) + defer func() { + c.nqosCache.UnlockKey(key) + klog.V(5).Infof("%s - Finished reconciling NetworkQoS %s : %v", c.controllerName, key, time.Since(startTime)) + }() + klog.V(5).Infof("%s - reconciling NetworkQoS %s", c.controllerName, key) + nqos, err := c.nqosLister.NetworkQoSes(nqosNamespace).Get(nqosName) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if nqos == nil || !nqos.DeletionTimestamp.IsZero() { + klog.V(6).Infof("%s - NetworkQoS %s is being deleted.", c.controllerName, key) + return c.clearNetworkQos(nqosNamespace, nqosName) + } + + if networkManagedByMe, err := c.networkManagedByMe(nqos.Spec.NetworkSelectors); err != nil { + return err + } else if !networkManagedByMe { + // maybe NetworkAttachmentName has been changed from this one to other value, try cleanup anyway + return c.clearNetworkQos(nqos.Namespace, nqos.Name) + } + + klog.V(5).Infof("%s - Processing NetworkQoS %s/%s", c.controllerName, nqos.Namespace, nqos.Name) + if err := c.ensureNetworkQos(nqos); err != nil { + c.nqosCache.Delete(key) + // we can ignore the error if status update doesn't succeed; best effort + c.updateNQOSStatusToNotReady(nqos.Namespace, nqos.Name, "failed to reconcile", err) + return err + } + recordNetworkQoSReconcileDuration(c.controllerName, time.Since(startTime).Milliseconds()) + updateNetworkQoSCount(c.controllerName, len(c.nqosCache.GetKeys())) + return nil +} + +// ensureNetworkQos will handle the main reconcile logic for any given nqos's +// add/update that might be triggered either due to NQOS changes or the corresponding +// matching pod or namespace changes. +// This function need to be called with a lock held. +func (c *Controller) ensureNetworkQos(nqos *networkqosapi.NetworkQoS) error { + desiredNQOSState := &networkQoSState{ + name: nqos.Name, + namespace: nqos.Namespace, + } + + if len(nqos.Spec.PodSelector.MatchLabels) > 0 || len(nqos.Spec.PodSelector.MatchExpressions) > 0 { + if podSelector, err := metav1.LabelSelectorAsSelector(&nqos.Spec.PodSelector); err != nil { + c.updateNQOSStatusToNotReady(nqos.Namespace, nqos.Name, "failed to parse source pod selector", err) + return nil + } else { + desiredNQOSState.PodSelector = podSelector + } + } + + // set EgressRules to desiredNQOSState + rules := []*GressRule{} + for index, ruleSpec := range nqos.Spec.Egress { + bwRate := int(ruleSpec.Bandwidth.Rate) + bwBurst := int(ruleSpec.Bandwidth.Burst) + ruleState := &GressRule{ + Priority: getQoSRulePriority(nqos.Spec.Priority, index), + Dscp: ruleSpec.DSCP, + } + if bwRate > 0 { + ruleState.Rate = &bwRate + } + if bwBurst > 0 { + ruleState.Burst = &bwBurst + } + destStates := []*Destination{} + for _, destSpec := range ruleSpec.Classifier.To { + if destSpec.IPBlock != nil && (destSpec.PodSelector != nil || destSpec.NamespaceSelector != nil) { + return fmt.Errorf("specifying both ipBlock and podSelector/namespaceSelector is not allowed") + } + destState := &Destination{} + destState.IpBlock = destSpec.IPBlock.DeepCopy() + if destSpec.NamespaceSelector != nil && (len(destSpec.NamespaceSelector.MatchLabels) > 0 || len(destSpec.NamespaceSelector.MatchExpressions) > 0) { + if selector, err := metav1.LabelSelectorAsSelector(destSpec.NamespaceSelector); err != nil { + return fmt.Errorf("error parsing destination namespace selector: %v", err) + } else { + destState.NamespaceSelector = selector + } + } + if destSpec.PodSelector != nil && (len(destSpec.PodSelector.MatchLabels) > 0 || len(destSpec.PodSelector.MatchExpressions) > 0) { + if selector, err := metav1.LabelSelectorAsSelector(destSpec.PodSelector); err != nil { + return fmt.Errorf("error parsing destination pod selector: %v", err) + } else { + destState.PodSelector = selector + } + } + destStates = append(destStates, destState) + } + ruleState.Classifier = &Classifier{ + Destinations: destStates, + } + ruleState.Classifier.Ports = ruleSpec.Classifier.Ports + rules = append(rules, ruleState) + } + desiredNQOSState.EgressRules = rules + if err := desiredNQOSState.initAddressSets(c.addressSetFactory, c.controllerName); err != nil { + return err + } + if err := c.resyncPods(desiredNQOSState); err != nil { + return fmt.Errorf("failed to resync pods: %w", err) + } + // delete stale rules left from previous NetworkQoS definition, along with the address sets + if err := c.cleanupStaleOvnObjects(desiredNQOSState); err != nil { + return fmt.Errorf("failed to delete stale QoSes: %w", err) + } + c.nqosCache.Store(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name), desiredNQOSState) + if e := c.updateNQOSStatusToReady(nqos.Namespace, nqos.Name); e != nil { + return fmt.Errorf("successfully reconciled NetworkQoS %s/%s, but failed to patch status: %v", nqos.Namespace, nqos.Name, e) + } + return nil +} + +// clearNetworkQos will handle the logic for deleting all db objects related +// to the provided nqos which got deleted. it looks up object in OVN by comparing +// the nqos name with the metadata in externalIDs. +// this function need to be called with a lock held. +func (c *Controller) clearNetworkQos(nqosNamespace, nqosName string) error { + k8sFullName := joinMetaNamespaceAndName(nqosNamespace, nqosName) + ovnObjectName := joinMetaNamespaceAndName(nqosNamespace, nqosName, ":") + + klog.V(4).Infof("%s - try cleaning up networkqos %s", c.controllerName, k8sFullName) + // remove NBDB objects by NetworkQoS name + if err := c.deleteByName(ovnObjectName); err != nil { + return fmt.Errorf("failed to delete QoS rules for NetworkQoS %s: %w", k8sFullName, err) + } + c.nqosCache.Delete(k8sFullName) + updateNetworkQoSCount(c.controllerName, len(c.nqosCache.GetKeys())) + return nil +} + +const ( + conditionTypeReady = "Ready-In-Zone-" + reasonQoSSetupSuccess = "Success" + reasonQoSSetupFailed = "Failed" +) + +func (c *Controller) updateNQOSStatusToReady(namespace, name string) error { + cond := metav1.Condition{ + Type: conditionTypeReady + c.zone, + Status: metav1.ConditionTrue, + Reason: reasonQoSSetupSuccess, + Message: "NetworkQoS was applied successfully", + } + startTime := time.Now() + err := c.updateNQOStatusCondition(cond, namespace, name) + if err != nil { + return fmt.Errorf("failed to update the status of NetworkQoS %s/%s, err: %v", namespace, name, err) + } + klog.V(5).Infof("%s: successfully patched the status of NetworkQoS %s/%s with condition type %v/%v in %v seconds", + c.controllerName, namespace, name, conditionTypeReady+c.zone, metav1.ConditionTrue, time.Since(startTime).Seconds()) + recordStatusPatchDuration(c.controllerName, time.Since(startTime).Milliseconds()) + return nil +} + +func (c *Controller) updateNQOSStatusToNotReady(namespace, name, reason string, err error) { + msg := reason + if err != nil { + msg = fmt.Sprintf("NetworkQoS %s/%s - %s, error details: %v", namespace, name, reason, err) + } + cond := metav1.Condition{ + Type: conditionTypeReady + c.zone, + Status: metav1.ConditionFalse, + Reason: reasonQoSSetupFailed, + Message: msg, + } + klog.Error(msg) + startTime := time.Now() + err = c.updateNQOStatusCondition(cond, namespace, name) + if err != nil { + klog.Warningf("%s: failed to update the status of NetworkQoS %s/%s, err: %v", c.controllerName, namespace, name, err) + } else { + klog.V(6).Infof("%s: successfully patched status of NetworkQoS %s/%s with condition type %v/%v in %v seconds", c.controllerName, namespace, name, conditionTypeReady+c.zone, metav1.ConditionTrue, time.Since(startTime).Seconds()) + recordStatusPatchDuration(c.controllerName, time.Since(startTime).Milliseconds()) + } +} + +func (c *Controller) updateNQOStatusCondition(newCondition metav1.Condition, namespace, name string) error { + nqos, err := c.nqosLister.NetworkQoSes(namespace).Get(name) + if err != nil { + if apierrors.IsNotFound(err) { + // Resource was deleted, log it + klog.V(5).Infof("NetworkQoS %s/%s updating status but not found, ignoring", namespace, name) + return nil + } + return err + } + + existingCondition := meta.FindStatusCondition(nqos.Status.Conditions, newCondition.Type) + newConditionApply := &metaapplyv1.ConditionApplyConfiguration{ + Type: &newCondition.Type, + Status: &newCondition.Status, + ObservedGeneration: &newCondition.ObservedGeneration, + Reason: &newCondition.Reason, + Message: &newCondition.Message, + } + + if existingCondition == nil || existingCondition.Status != newCondition.Status { + newConditionApply.LastTransitionTime = ptr.To(metav1.NewTime(time.Now())) + } else { + newConditionApply.LastTransitionTime = &existingCondition.LastTransitionTime + } + + applyObj := nqosapiapply.NetworkQoS(name, namespace). + WithStatus(nqosapiapply.Status().WithConditions(newConditionApply)) + _, err = c.nqosClientSet.K8sV1alpha1().NetworkQoSes(namespace).ApplyStatus(context.TODO(), applyObj, metav1.ApplyOptions{FieldManager: c.zone, Force: true}) + return err +} + +func (c *Controller) resyncPods(nqosState *networkQoSState) error { + pods, err := c.nqosPodLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list pods in namespace %s: %w", nqosState.namespace, err) + } + nsCache := make(map[string]*corev1.Namespace) + addressSetMap := map[string]sets.Set[string]{} + for _, pod := range pods { + if pod.Spec.HostNetwork || pod.DeletionTimestamp != nil { + continue + } + ns := nsCache[pod.Namespace] + if ns == nil { + ns, err = c.nqosNamespaceLister.Get(pod.Namespace) + if err != nil { + if apierrors.IsNotFound(err) { + klog.Warningf("Namespace %s not found, skipping pod %s/%s", pod.Namespace, pod.Namespace, pod.Name) + continue + } + return fmt.Errorf("failed to get namespace %s: %w", pod.Namespace, err) + } + nsCache[pod.Namespace] = ns + } + if ns.DeletionTimestamp != nil { + continue + } + if err := c.setPodForNQOS(pod, nqosState, ns, addressSetMap); err != nil { + return err + } + } + return nqosState.cleanupStaleAddresses(addressSetMap) +} + +var cudnController = udnv1.SchemeGroupVersion.WithKind("ClusterUserDefinedNetwork") + +// networkManagedByMe determines if any of the networks specified in the networkSelectors are managed by this controller. +// It returns true if: +// - Multi-network is disabled (nadLister is nil) and this is the default network controller +// - No selectors are provided and this is the default network controller +// - Any of the selected networks match one of these criteria: +// - The selector is for the default network and this is the default network controller +// - The selector is for cluster user defined networks (CUDNs) and any of the matching NADs are controlled by a CUDN +// - The selector is for network attachment definitions (NADs) and any of the matching NADs are managed by this controller +// +// Returns an error if: +// - Any of the network selectors are invalid or empty +// - There is an error listing network attachment definitions +func (c *Controller) networkManagedByMe(networkSelectors crdtypes.NetworkSelectors) (bool, error) { + // return c.IsDefault() if multi-network is disabled or no selectors is provided in spec + if c.nadLister == nil || len(networkSelectors) == 0 { + return c.IsDefault(), nil + } + var selectedNads []*nadv1.NetworkAttachmentDefinition + var err error + for _, networkSelector := range networkSelectors { + switch networkSelector.NetworkSelectionType { + case crdtypes.DefaultNetwork: + return c.IsDefault(), nil + case crdtypes.PrimaryUserDefinedNetworks: + if !c.IsPrimaryNetwork() { + return false, nil + } + if networkSelector.PrimaryUserDefinedNetworkSelector == nil { + return false, fmt.Errorf("empty primary user defined network selector") + } + selectedNads, err = c.getNetAttachDefsByNamespace(&networkSelector.PrimaryUserDefinedNetworkSelector.NamespaceSelector) + if err != nil { + return false, err + } + case crdtypes.SecondaryUserDefinedNetworks: + if !c.IsSecondary() { + return false, nil + } + if networkSelector.SecondaryUserDefinedNetworkSelector == nil { + return false, fmt.Errorf("empty secondary user defined network selector") + } + selectedNads, err = c.getNetAttachDefsBySelectors(&networkSelector.SecondaryUserDefinedNetworkSelector.NamespaceSelector, &networkSelector.SecondaryUserDefinedNetworkSelector.NetworkSelector) + if err != nil { + return false, err + } + case crdtypes.ClusterUserDefinedNetworks: + if networkSelector.ClusterUserDefinedNetworkSelector == nil { + return false, fmt.Errorf("empty cluster user defined network selector") + } + nads, err := c.getNetAttachDefsBySelectors(nil, &networkSelector.ClusterUserDefinedNetworkSelector.NetworkSelector) + if err != nil { + return false, err + } + for _, nad := range nads { + // check this NAD is controlled by a CUDN + controller := metav1.GetControllerOfNoCopy(nad) + isCUDN := controller != nil && controller.Kind == cudnController.Kind && controller.APIVersion == cudnController.GroupVersion().String() + if !isCUDN { + continue + } + selectedNads = append(selectedNads, nad) + } + case crdtypes.NetworkAttachmentDefinitions: + if networkSelector.NetworkAttachmentDefinitionSelector == nil { + return false, fmt.Errorf("empty network attachment definition selector") + } + selectedNads, err = c.getNetAttachDefsBySelectors(&networkSelector.NetworkAttachmentDefinitionSelector.NamespaceSelector, &networkSelector.NetworkAttachmentDefinitionSelector.NetworkSelector) + if err != nil { + return false, err + } + default: + return false, fmt.Errorf("unsupported network selection type %s", networkSelector.NetworkSelectionType) + } + } + if len(selectedNads) == 0 { + return false, nil + } + for _, nad := range selectedNads { + nadKey := joinMetaNamespaceAndName(nad.Namespace, nad.Name) + if ((nadKey == types.DefaultNetworkName) && c.IsDefault()) || + (!c.IsDefault() && c.HasNAD(nadKey)) { + return true, nil + } + } + return false, nil +} + +func (c *Controller) getLogicalSwitchName(nodeName string) string { + switch { + case c.TopologyType() == types.Layer2Topology: + return c.GetNetworkScopedSwitchName(types.OVNLayer2Switch) + case c.TopologyType() == types.LocalnetTopology: + return c.GetNetworkScopedSwitchName(types.OVNLocalnetSwitch) + case !c.IsSecondary() || c.TopologyType() == types.Layer3Topology: + return c.GetNetworkScopedSwitchName(nodeName) + default: + return "" + } +} + +func (c *Controller) getAllNetworkQoSes() ([]*networkqosapi.NetworkQoS, error) { + nqoses, err := c.nqosLister.List(labels.Everything()) + if err != nil { + return nil, fmt.Errorf("failed to list NetworkQoS: %v", err) + } + return nqoses, nil +} + +func (c *Controller) getNetAttachDefsByNamespace(namespaceSelector *metav1.LabelSelector) ([]*nadv1.NetworkAttachmentDefinition, error) { + var selectedNads []*nadv1.NetworkAttachmentDefinition + if namespaceSelector != nil && namespaceSelector.Size() > 0 { + nsSelector, err := metav1.LabelSelectorAsSelector(namespaceSelector) + if err != nil { + return nil, fmt.Errorf("invalid namespace selector %v: %v", namespaceSelector.String(), err) + } + namespaces, err := c.nqosNamespaceLister.List(nsSelector) + if err != nil { + return nil, fmt.Errorf("failed to list namespaces: %v", err) + } + for _, ns := range namespaces { + nads, err := c.nadLister.NetworkAttachmentDefinitions(ns.Name).List(labels.Everything()) + if err != nil { + return nil, fmt.Errorf("failed to list NADs in namespace %s: %v", ns.Name, err) + } + selectedNads = append(selectedNads, nads...) + } + } + return selectedNads, nil +} + +func (c *Controller) getNetAttachDefsBySelectors(namespaceSelector, nadSelector *metav1.LabelSelector) ([]*nadv1.NetworkAttachmentDefinition, error) { + if nadSelector == nil || nadSelector.Size() == 0 { + return nil, fmt.Errorf("empty network selector") + } + nadSel, err := metav1.LabelSelectorAsSelector(nadSelector) + if err != nil { + return nil, fmt.Errorf("invalid network selector %v: %v", nadSelector.String(), err) + } + var selectedNads []*nadv1.NetworkAttachmentDefinition + if namespaceSelector != nil && namespaceSelector.Size() > 0 { + nsSelector, err := metav1.LabelSelectorAsSelector(namespaceSelector) + if err != nil { + return nil, fmt.Errorf("invalid namespace selector %v: %v", namespaceSelector.String(), err) + } + namespaces, err := c.nqosNamespaceLister.List(nsSelector) + if err != nil { + return nil, fmt.Errorf("failed to list namespaces: %v", err) + } + for _, ns := range namespaces { + nads, err := c.nadLister.NetworkAttachmentDefinitions(ns.Name).List(nadSel) + if err != nil { + return nil, fmt.Errorf("failed to list NADs in namespace %s: %v", ns.Name, err) + } + selectedNads = append(selectedNads, nads...) + } + } else { + nads, err := c.nadLister.List(nadSel) + if err != nil { + return nil, fmt.Errorf("failed to list NADs: %v", err) + } + selectedNads = append(selectedNads, nads...) + } + return selectedNads, nil +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_controller.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_controller.go new file mode 100644 index 0000000000..15511e35d8 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_controller.go @@ -0,0 +1,553 @@ +package networkqos + +import ( + "fmt" + "reflect" + "sync" + "time" + + nadinformerv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1" + nadlisterv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + corev1informers "k8s.io/client-go/informers/core/v1" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + networkqosinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/informers/externalversions/networkqos/v1alpha1" + networkqoslister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/listers/networkqos/v1alpha1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +const ( + // maxRetries is the number of times a object will be retried before it is dropped out of the queue. + // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the + // sequence of delays between successive queuings of an object. + // + // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s + maxRetries = 15 +) + +// Controller holds the fields required for NQOS controller +// taken from k8s controller guidelines +type Controller struct { + // name of the controller that starts the NQOS controller + // (values are default-network-controller, secondary-network-controller etc..) + controllerName string + util.NetInfo + nqosClientSet networkqosclientset.Interface + + // libovsdb northbound client interface + nbClient libovsdbclient.Client + eventRecorder record.EventRecorder + // An address set factory that creates address sets + addressSetFactory addressset.AddressSetFactory + // pass in the isPodScheduledinLocalZone util from bnc - used only to determine + // what zones the pods are in. + // isPodScheduledinLocalZone returns whether the provided pod is in a zone local to the zone controller + // So if pod is not scheduled yet it is considered remote. Also if we can't fetch node from kapi and determing the zone, + // we consider it remote - this is ok for this controller as this variable is only used to + // determine if we need to add pod's port to port group or not - future updates should + // take care of reconciling the state of the cluster + isPodScheduledinLocalZone func(*corev1.Pod) bool + // store's the name of the zone that this controller belongs to + zone string + + // namespace+name -> cloned value of NetworkQoS + nqosCache *syncmap.SyncMap[*networkQoSState] + + // queues for the CRDs where incoming work is placed to de-dup + nqosQueue workqueue.TypedRateLimitingInterface[string] + // cached access to nqos objects + nqosLister networkqoslister.NetworkQoSLister + nqosCacheSynced cache.InformerSynced + // namespace queue, cache, lister + nqosNamespaceLister corev1listers.NamespaceLister + nqosNamespaceSynced cache.InformerSynced + nqosNamespaceQueue workqueue.TypedRateLimitingInterface[*eventData[*corev1.Namespace]] + // pod queue, cache, lister + nqosPodLister corev1listers.PodLister + nqosPodSynced cache.InformerSynced + nqosPodQueue workqueue.TypedRateLimitingInterface[*eventData[*corev1.Pod]] + // node queue, cache, lister + nqosNodeLister corev1listers.NodeLister + nqosNodeSynced cache.InformerSynced + nqosNodeQueue workqueue.TypedRateLimitingInterface[string] + + // nad lister, only valid for default network controller when multi-network is enabled + nadLister nadlisterv1.NetworkAttachmentDefinitionLister + nadSynced cache.InformerSynced +} + +type eventData[T metav1.Object] struct { + old T + new T +} + +func newEventData[T metav1.Object](old T, new T) *eventData[T] { + return &eventData[T]{ + old: old, + new: new, + } +} + +func (e *eventData[T]) name() string { + if !reflect.ValueOf(e.old).IsNil() { + return e.old.GetName() + } else if !reflect.ValueOf(e.new).IsNil() { + return e.new.GetName() + } + return "" +} + +func (e *eventData[T]) namespace() string { + if !reflect.ValueOf(e.old).IsNil() { + return e.old.GetNamespace() + } else if !reflect.ValueOf(e.new).IsNil() { + return e.new.GetNamespace() + } + return "" +} + +// NewController returns a new *Controller. +func NewController( + controllerName string, + netInfo util.NetInfo, + nbClient libovsdbclient.Client, + recorder record.EventRecorder, + nqosClient networkqosclientset.Interface, + nqosInformer networkqosinformer.NetworkQoSInformer, + namespaceInformer corev1informers.NamespaceInformer, + podInformer corev1informers.PodInformer, + nodeInformer corev1informers.NodeInformer, + nadInformer nadinformerv1.NetworkAttachmentDefinitionInformer, + addressSetFactory addressset.AddressSetFactory, + isPodScheduledinLocalZone func(*corev1.Pod) bool, + zone string) (*Controller, error) { + + c := &Controller{ + controllerName: controllerName, + NetInfo: netInfo, + nbClient: nbClient, + nqosClientSet: nqosClient, + addressSetFactory: addressSetFactory, + isPodScheduledinLocalZone: isPodScheduledinLocalZone, + zone: zone, + nqosCache: syncmap.NewSyncMap[*networkQoSState](), + } + + klog.V(5).Infof("Setting up event handlers for Network QoS controller %s", controllerName) + // setup nqos informers, listers, queue + c.nqosLister = nqosInformer.Lister() + c.nqosCacheSynced = nqosInformer.Informer().HasSynced + c.nqosQueue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemFastSlowRateLimiter[string](1*time.Second, 5*time.Second, 5), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "networkQoS"}, + ) + _, err := nqosInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNQOSAdd, + UpdateFunc: c.onNQOSUpdate, + DeleteFunc: c.onNQOSDelete, + })) + if err != nil { + return nil, fmt.Errorf("could not add Event Handler for nqosInformer during network qos controller initialization, %w", err) + } + + klog.V(5).Info("Setting up event handlers for Namespaces in Network QoS controller") + c.nqosNamespaceLister = namespaceInformer.Lister() + c.nqosNamespaceSynced = namespaceInformer.Informer().HasSynced + c.nqosNamespaceQueue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemFastSlowRateLimiter[*eventData[*corev1.Namespace]](1*time.Second, 5*time.Second, 5), + workqueue.TypedRateLimitingQueueConfig[*eventData[*corev1.Namespace]]{Name: "nqosNamespaces"}, + ) + _, err = namespaceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNQOSNamespaceAdd, + UpdateFunc: c.onNQOSNamespaceUpdate, + DeleteFunc: c.onNQOSNamespaceDelete, + })) + if err != nil { + return nil, fmt.Errorf("could not add Event Handler for namespace Informer during network qos controller initialization, %w", err) + } + + klog.V(5).Info("Setting up event handlers for Pods in Network QoS controller") + c.nqosPodLister = podInformer.Lister() + c.nqosPodSynced = podInformer.Informer().HasSynced + c.nqosPodQueue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemFastSlowRateLimiter[*eventData[*corev1.Pod]](1*time.Second, 5*time.Second, 5), + workqueue.TypedRateLimitingQueueConfig[*eventData[*corev1.Pod]]{Name: "nqosPods"}, + ) + _, err = podInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNQOSPodAdd, + UpdateFunc: c.onNQOSPodUpdate, + DeleteFunc: c.onNQOSPodDelete, + })) + if err != nil { + return nil, fmt.Errorf("could not add Event Handler for pod Informer during network qos controller initialization, %w", err) + } + + klog.V(5).Info("Setting up event handlers for Nodes in Network QoS controller") + c.nqosNodeLister = nodeInformer.Lister() + c.nqosNodeSynced = nodeInformer.Informer().HasSynced + c.nqosNodeQueue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemFastSlowRateLimiter[string](1*time.Second, 5*time.Second, 5), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "nqosNodes"}, + ) + _, err = nodeInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + UpdateFunc: c.onNQOSNodeUpdate, + })) + if err != nil { + return nil, fmt.Errorf("could not add Event Handler for node Informer during network qos controller initialization, %w", err) + } + + if nadInformer != nil { + c.nadLister = nadInformer.Lister() + c.nadSynced = nadInformer.Informer().HasSynced + } + + c.eventRecorder = recorder + return c, nil +} + +// Run will not return until stopCh is closed. workers determines how many +// objects (pods, namespaces, nqoses) will be handled in parallel. +func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting controller %s", c.controllerName) + + // Wait for the caches to be synced + klog.V(5).Info("Waiting for informer caches (networkqos,namespace,pod,node) to sync") + if !util.WaitForInformerCacheSyncWithTimeout(c.controllerName, stopCh, c.nqosCacheSynced, c.nqosNamespaceSynced, c.nqosPodSynced, c.nqosNodeSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for informer caches (networkqos,namespace,pod,node) to sync")) + return + } + if c.nadSynced != nil { + klog.V(5).Info("Waiting for net-attach-def informer cache to sync") + if !util.WaitForInformerCacheSyncWithTimeout(c.controllerName, stopCh, c.nadSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for net-attach-def informer cache to sync")) + return + } + } + + klog.Infof("Repairing Network QoSes") + // Run the repair function at startup so that we synchronize KAPI and OVNDBs + err := c.repairNetworkQoSes() + if err != nil { + klog.Errorf("Failed to repair Network QoS: %v", err) + } + + wg := &sync.WaitGroup{} + // Start the workers after the repair loop to avoid races + klog.V(5).Info("Starting Network QoS workers") + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + c.runNQOSWorker(wg) + }, time.Second, stopCh) + }() + } + + klog.V(5).Info("Starting Namespace Network QoS workers") + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + c.runNQOSNamespaceWorker(wg) + }, time.Second, stopCh) + }() + } + + klog.V(5).Info("Starting Pod Network QoS workers") + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + c.runNQOSPodWorker(wg) + }, time.Second, stopCh) + }() + } + + klog.V(5).Info("Starting Node Network QoS workers") + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + c.runNQOSNodeWorker(wg) + }, time.Second, stopCh) + }() + } + + <-stopCh + + klog.Infof("Shutting down controller %s", c.controllerName) + c.nqosQueue.ShutDown() + c.nqosNamespaceQueue.ShutDown() + c.nqosPodQueue.ShutDown() + c.nqosNodeQueue.ShutDown() + c.teardownMetricsCollector() + wg.Wait() +} + +// worker runs a worker thread that just dequeues items, processes them, and +// marks them done. You may run as many of these in parallel as you wish; the +// workqueue guarantees that they will not end up processing the same object +// at the same time. +func (c *Controller) runNQOSWorker(wg *sync.WaitGroup) { + for c.processNextNQOSWorkItem(wg) { + } +} + +func (c *Controller) runNQOSNamespaceWorker(wg *sync.WaitGroup) { + for c.processNextNQOSNamespaceWorkItem(wg) { + } +} + +func (c *Controller) runNQOSPodWorker(wg *sync.WaitGroup) { + for c.processNextNQOSPodWorkItem(wg) { + } +} + +func (c *Controller) runNQOSNodeWorker(wg *sync.WaitGroup) { + for c.processNextNQOSNodeWorkItem(wg) { + } +} + +// handlers + +// onNQOSAdd queues the NQOS for processing. +func (c *Controller) onNQOSAdd(obj any) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.nqosQueue.Add(key) +} + +// onNQOSUpdate updates the NQOS Selector in the cache and queues the NQOS for processing. +func (c *Controller) onNQOSUpdate(oldObj, newObj any) { + oldNQOS, ok := oldObj.(*networkqosapi.NetworkQoS) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting NetworkQoS but received %T", oldObj)) + return + } + newNQOS, ok := newObj.(*networkqosapi.NetworkQoS) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting NetworkQoS but received %T", newObj)) + return + } + // don't process resync or objects that are marked for deletion + if oldNQOS.ResourceVersion == newNQOS.ResourceVersion || + !newNQOS.GetDeletionTimestamp().IsZero() { + return + } + if reflect.DeepEqual(oldNQOS.Spec, newNQOS.Spec) { + return + } + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err == nil { + // updates to NQOS object should be very rare, once put in place they usually stay the same + klog.V(4).Infof("Updating Network QoS %s: nqosSpec %v", + key, newNQOS.Spec) + c.nqosQueue.Add(key) + } +} + +// onNQOSDelete queues the NQOS for processing. +func (c *Controller) onNQOSDelete(obj interface{}) { + key, err := cache.MetaNamespaceKeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + c.nqosQueue.Add(key) +} + +// onNQOSNamespaceAdd queues the namespace for processing. +func (c *Controller) onNQOSNamespaceAdd(obj interface{}) { + ns, ok := obj.(*corev1.Namespace) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Namespace but received %T", obj)) + return + } + if ns == nil { + utilruntime.HandleError(fmt.Errorf("empty namespace")) + return + } + c.nqosNamespaceQueue.Add(newEventData(nil, ns)) +} + +// onNQOSNamespaceUpdate queues the namespace for processing. +func (c *Controller) onNQOSNamespaceUpdate(oldObj, newObj interface{}) { + oldNamespace, ok := oldObj.(*corev1.Namespace) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Namespace but received %T", oldObj)) + return + } + newNamespace, ok := newObj.(*corev1.Namespace) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Namespace but received %T", newObj)) + return + } + if oldNamespace == nil || newNamespace == nil { + utilruntime.HandleError(fmt.Errorf("empty namespace")) + return + } + if oldNamespace.ResourceVersion == newNamespace.ResourceVersion || !newNamespace.GetDeletionTimestamp().IsZero() { + return + } + // If the labels have not changed, then there's no change that we care about: return. + oldNamespaceLabels := labels.Set(oldNamespace.Labels) + newNamespaceLabels := labels.Set(newNamespace.Labels) + if labels.Equals(oldNamespaceLabels, newNamespaceLabels) { + return + } + klog.V(5).Infof("Namespace %s labels have changed: %v", newNamespace.Name, newNamespaceLabels) + c.nqosNamespaceQueue.Add(newEventData(oldNamespace, newNamespace)) +} + +// onNQOSNamespaceDelete queues the namespace for processing. +func (c *Controller) onNQOSNamespaceDelete(obj interface{}) { + ns, ok := obj.(*corev1.Namespace) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj)) + return + } + ns, ok = tombstone.Obj.(*corev1.Namespace) + if !ok { + utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a Namespace: %#v", tombstone.Obj)) + return + } + } + if ns != nil { + c.nqosNamespaceQueue.Add(newEventData(ns, nil)) + } +} + +// onNQOSPodAdd queues the pod for processing. +func (c *Controller) onNQOSPodAdd(obj interface{}) { + pod, ok := obj.(*corev1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Pod but received %T", obj)) + return + } + if pod == nil { + utilruntime.HandleError(fmt.Errorf("empty pod")) + return + } + c.nqosPodQueue.Add(newEventData(nil, pod)) +} + +// onNQOSPodUpdate queues the pod for processing. +func (c *Controller) onNQOSPodUpdate(oldObj, newObj interface{}) { + oldPod, ok := oldObj.(*corev1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Pod but received %T", oldObj)) + return + } + newPod, ok := newObj.(*corev1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Pod but received %T", newObj)) + return + } + if oldPod == nil || newPod == nil { + utilruntime.HandleError(fmt.Errorf("empty pod")) + return + } + // don't process resync or objects that are marked for deletion + if oldPod.ResourceVersion == newPod.ResourceVersion || + !newPod.GetDeletionTimestamp().IsZero() { + return + } + // We only care about pod's label changes, pod's IP changes + // pod going into completed state and pod getting scheduled and switching + // zones. Rest of the cases we may return + oldPodLabels := labels.Set(oldPod.Labels) + newPodLabels := labels.Set(newPod.Labels) + oldPodIPs, _ := util.GetPodIPsOfNetwork(oldPod, c.NetInfo) + newPodIPs, _ := util.GetPodIPsOfNetwork(newPod, c.NetInfo) + oldPodCompleted := util.PodCompleted(oldPod) + newPodCompleted := util.PodCompleted(newPod) + if labels.Equals(oldPodLabels, newPodLabels) && + // check for podIP changes (in case we allocate and deallocate) or for dualstack conversion + // it will also catch the pod update that will come when LSPAdd and IPAM allocation are done + len(oldPodIPs) == len(newPodIPs) && + oldPodCompleted == newPodCompleted { + return + } + klog.V(5).Infof("Handling update event for pod %s/%s, labels %v, podIPs: %v, PodCompleted?: %v", newPod.Namespace, newPod.Name, newPodLabels, newPodIPs, newPodCompleted) + c.nqosPodQueue.Add(newEventData(oldPod, newPod)) +} + +// onNQOSPodDelete queues the pod for processing. +func (c *Controller) onNQOSPodDelete(obj interface{}) { + pod, ok := obj.(*corev1.Pod) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj)) + return + } + pod, ok = tombstone.Obj.(*corev1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a Pod: %#v", tombstone.Obj)) + return + } + } + if pod != nil { + c.nqosPodQueue.Add(newEventData(pod, nil)) + } +} + +// onNQOSNodeUpdate queues the node for processing. +func (c *Controller) onNQOSNodeUpdate(oldObj, newObj interface{}) { + oldNode, ok := oldObj.(*corev1.Node) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Node but received %T", oldObj)) + return + } + newNode, ok := newObj.(*corev1.Node) + if !ok { + utilruntime.HandleError(fmt.Errorf("expecting Node but received %T", newObj)) + return + } + // don't process resync or objects that are marked for deletion + if oldNode.ResourceVersion == newNode.ResourceVersion || + !newNode.GetDeletionTimestamp().IsZero() { + return + } + // node not in local zone, no need to process + if !c.isNodeInLocalZone(oldNode) && !c.isNodeInLocalZone(newNode) { + return + } + // only care about node's zone name changes + if !util.NodeZoneAnnotationChanged(oldNode, newNode) { + return + } + klog.V(4).Infof("Node %s zone changed from %s to %s", newNode.Name, oldNode.Annotations[util.OvnNodeZoneName], newNode.Annotations[util.OvnNodeZoneName]) + key, err := cache.MetaNamespaceKeyFunc(newObj) + if err == nil { + c.nqosNodeQueue.Add(key) + } +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_namespace.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_namespace.go new file mode 100644 index 0000000000..f3aa34d909 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_namespace.go @@ -0,0 +1,182 @@ +package networkqos + +import ( + "fmt" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + nqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" +) + +func (c *Controller) processNextNQOSNamespaceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + eventData, shutdown := c.nqosNamespaceQueue.Get() + if shutdown { + return false + } + defer c.nqosNamespaceQueue.Done(eventData) + + if err := c.syncNetworkQoSNamespace(eventData); err != nil { + if c.nqosNamespaceQueue.NumRequeues(eventData) < maxRetries { + klog.Errorf("%s: Failed to reconcile namespace %s: %v", c.controllerName, eventData.name(), err) + c.nqosNamespaceQueue.AddRateLimited(eventData) + return true + } + utilruntime.HandleError(fmt.Errorf("failed to reconcile namespace %s: %v", eventData.name(), err)) + } + c.nqosNamespaceQueue.Forget(eventData) + return true +} + +// syncNetworkQoSNamespace checks if the namespace change affects any NetworkQoS +func (c *Controller) syncNetworkQoSNamespace(eventData *eventData[*corev1.Namespace]) error { + startTime := time.Now() + klog.V(5).Infof("Reconciling namespace event for %s ", eventData.name()) + defer func() { + klog.V(5).Infof("Finished reconciling namespace %s, took %v", eventData.name(), time.Since(startTime)) + }() + nqosNames, err := c.getNetworkQosForNamespaceChange(eventData) + if err != nil { + return err + } + for nqosName := range nqosNames { + c.nqosQueue.Add(nqosName) + } + recordNamespaceReconcileDuration(c.controllerName, time.Since(startTime).Milliseconds()) + return nil +} + +// getNetworkQosForNamespaceChange returns the set of NetworkQoS names that are affected by the namespace change +func (c *Controller) getNetworkQosForNamespaceChange(eventData *eventData[*corev1.Namespace]) (sets.Set[string], error) { + networkQoSes := sets.Set[string]{} + nqoses, err := c.getAllNetworkQoSes() + if err != nil { + return nil, err + } + for _, nqos := range nqoses { + ns := eventData.new + if ns == nil { + ns = eventData.old + } + // check if any network selector matches the namespace, or ns label change affects the network selection + if namespaceMatchesNetworkSelector(ns, nqos) || networkSelectionChanged(nqos, eventData.new, eventData.old) { + networkQoSes.Insert(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + continue + } + // check if any egress rule matches the namespace, or ns label change affects the egress selection + if namespaceMatchesEgressRule(ns, nqos) || egressSelectionChanged(nqos, eventData.new, eventData.old) { + networkQoSes.Insert(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + } + } + return networkQoSes, nil +} + +// namespaceMatchesNetworkSelector checks if the namespace matches any of the network selectors in the NetworkQoS +func namespaceMatchesNetworkSelector(namespace *corev1.Namespace, nqos *nqosv1alpha1.NetworkQoS) bool { + for _, selector := range nqos.Spec.NetworkSelectors { + var nsSelector *metav1.LabelSelector + switch { + case selector.NetworkAttachmentDefinitionSelector != nil: + if selector.NetworkAttachmentDefinitionSelector.NamespaceSelector.Size() == 0 { + // namespace selector is empty, match all + return true + } + nsSelector = &selector.NetworkAttachmentDefinitionSelector.NamespaceSelector + case selector.PrimaryUserDefinedNetworkSelector != nil: + if selector.PrimaryUserDefinedNetworkSelector.NamespaceSelector.Size() == 0 { + // namespace selector is empty, match all + return true + } + nsSelector = &selector.PrimaryUserDefinedNetworkSelector.NamespaceSelector + case selector.SecondaryUserDefinedNetworkSelector != nil: + if selector.SecondaryUserDefinedNetworkSelector.NamespaceSelector.Size() == 0 { + // namespace selector is empty, match all + return true + } + nsSelector = &selector.SecondaryUserDefinedNetworkSelector.NamespaceSelector + } + if nsSelector == nil { + continue + } + if ls, err := metav1.LabelSelectorAsSelector(nsSelector); err != nil { + klog.Errorf("%s/%s - failed to convert namespace selector %s : %v", nqos.Namespace, nqos.Name, nsSelector.String(), err) + } else if ls != nil && ls.Matches(labels.Set(namespace.Labels)) { + return true + } + } + return false +} + +func namespaceMatchesEgressRule(namespace *corev1.Namespace, nqos *nqosv1alpha1.NetworkQoS) bool { + for _, egress := range nqos.Spec.Egress { + for _, dest := range egress.Classifier.To { + if dest.NamespaceSelector == nil || dest.NamespaceSelector.Size() == 0 { + // namespace selector is empty, match all + return true + } + if ls, err := metav1.LabelSelectorAsSelector(dest.NamespaceSelector); err != nil { + klog.Errorf("%s/%s - failed to convert egress namespace selector %s: %v", nqos.Namespace, nqos.Name, dest.NamespaceSelector.String(), err) + } else if ls != nil && ls.Matches(labels.Set(namespace.Labels)) { + return true + } + } + } + return false +} + +// check if namespace change causes the network selection change +func networkSelectionChanged(nqos *nqosv1alpha1.NetworkQoS, new *corev1.Namespace, old *corev1.Namespace) bool { + for _, selector := range nqos.Spec.NetworkSelectors { + var nsSelector *metav1.LabelSelector + switch selector.NetworkSelectionType { + case crdtypes.PrimaryUserDefinedNetworks: + if selector.PrimaryUserDefinedNetworkSelector != nil { + nsSelector = &selector.PrimaryUserDefinedNetworkSelector.NamespaceSelector + } + case crdtypes.SecondaryUserDefinedNetworks: + if selector.SecondaryUserDefinedNetworkSelector != nil { + nsSelector = &selector.SecondaryUserDefinedNetworkSelector.NamespaceSelector + } + case crdtypes.NetworkAttachmentDefinitions: + if selector.NetworkAttachmentDefinitionSelector != nil { + nsSelector = &selector.NetworkAttachmentDefinitionSelector.NamespaceSelector + } + } + if nsSelector == nil { + continue + } + if ls, err := metav1.LabelSelectorAsSelector(nsSelector); err != nil { + // namespace selector is not valid, skip this selector + klog.Errorf("%s/%s - failed to convert namespace selector %s: %v", nqos.Namespace, nqos.Name, nsSelector.String(), err) + } else if old != nil && new != nil { + return ls.Matches(labels.Set(old.Labels)) != ls.Matches(labels.Set(new.Labels)) + } + } + return false +} + +func egressSelectionChanged(nqos *nqosv1alpha1.NetworkQoS, new *corev1.Namespace, old *corev1.Namespace) bool { + for _, egress := range nqos.Spec.Egress { + for _, dest := range egress.Classifier.To { + if dest.NamespaceSelector == nil || dest.NamespaceSelector.Size() == 0 { + // empty namespace selector won't make difference + continue + } + if nsSelector, err := metav1.LabelSelectorAsSelector(dest.NamespaceSelector); err != nil { + klog.Errorf("Failed to convert namespace selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + } else if old != nil && new != nil { + return nsSelector.Matches(labels.Set(old.Labels)) != nsSelector.Matches(labels.Set(new.Labels)) + } + } + } + return false +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_node.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_node.go new file mode 100644 index 0000000000..8a78883044 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_node.go @@ -0,0 +1,68 @@ +package networkqos + +import ( + "fmt" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +func (c *Controller) processNextNQOSNodeWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + nqosNodeKey, quit := c.nqosNodeQueue.Get() + if quit { + return false + } + defer c.nqosNodeQueue.Done(nqosNodeKey) + err := c.syncNetworkQoSNode(nqosNodeKey) + if err == nil { + c.nqosNodeQueue.Forget(nqosNodeKey) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with: %v", nqosNodeKey, err)) + + if c.nqosNodeQueue.NumRequeues(nqosNodeKey) < maxRetries { + c.nqosNodeQueue.AddRateLimited(nqosNodeKey) + return true + } + + c.nqosNodeQueue.Forget(nqosNodeKey) + return true +} + +// syncNetworkQoSNode triggers resync of all the NetworkQoSes when a node moves in/out of local zone +func (c *Controller) syncNetworkQoSNode(key string) error { + startTime := time.Now() + _, nodeName, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + klog.V(5).Infof("Processing sync for Node %s in Network QoS controller", nodeName) + + defer func() { + klog.V(5).Infof("Finished syncing Node %s Network QoS controller: took %v", nodeName, time.Since(startTime)) + }() + // node moves in/out of local zone, resync all the NetworkQoSes + for _, nqosName := range c.nqosCache.GetKeys() { + ns, name, _ := cache.SplitMetaNamespaceKey(nqosName) + if nqos, err := c.nqosLister.NetworkQoSes(ns).Get(name); err != nil { + klog.Errorf("Failed to get NetworkQoS %s: %v", nqosName, err) + } else if nqos != nil { + c.nqosQueue.Add(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + } + } + return nil +} + +// isNodeInLocalZone returns whether the provided node is in a zone local to the zone controller +func (c *Controller) isNodeInLocalZone(node *corev1.Node) bool { + return util.GetNodeZone(node) == c.zone +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_ovnnb.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_ovnnb.go new file mode 100644 index 0000000000..82eed9b07e --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_ovnnb.go @@ -0,0 +1,280 @@ +package networkqos + +import ( + "errors" + "fmt" + "slices" + "strconv" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/libovsdb/ovsdb" + + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +func (c *Controller) findLogicalSwitch(switchName string) (*nbdb.LogicalSwitch, error) { + if lsws, err := libovsdbops.FindLogicalSwitchesWithPredicate(c.nbClient, func(item *nbdb.LogicalSwitch) bool { + return item.Name == switchName + }); err != nil { + return nil, fmt.Errorf("failed to look up logical switch %s: %w", switchName, err) + } else if len(lsws) > 0 { + return lsws[0], nil + } + return nil, fmt.Errorf("logical switch %s not found", switchName) +} + +func (c *Controller) addQoSToLogicalSwitch(qosState *networkQoSState, switchName string) error { + // find lsw + lsw, err := c.findLogicalSwitch(switchName) + if err != nil { + return err + } + // construct qoses + qoses := []*nbdb.QoS{} + ipv4Enabled, ipv6Enabled := c.IPMode() + for index, rule := range qosState.EgressRules { + dbIDs := qosState.getDbObjectIDs(c.controllerName, index) + qos := &nbdb.QoS{ + Action: map[string]int{}, + Bandwidth: map[string]int{}, + Direction: nbdb.QoSDirectionToLport, + ExternalIDs: dbIDs.GetExternalIDs(), + Match: generateNetworkQoSMatch(qosState, rule, ipv4Enabled, ipv6Enabled), + Priority: rule.Priority, + } + if c.IsSecondary() { + qos.ExternalIDs[types.NetworkExternalID] = c.GetNetworkName() + } + if rule.Dscp >= 0 { + qos.Action[nbdb.QoSActionDSCP] = rule.Dscp + } + if rule.Rate != nil && *rule.Rate > 0 { + qos.Bandwidth[nbdb.QoSBandwidthRate] = *rule.Rate + } + if rule.Burst != nil && *rule.Burst > 0 { + qos.Bandwidth[nbdb.QoSBandwidthBurst] = *rule.Burst + } + qoses = append(qoses, qos) + } + ops := []ovsdb.Operation{} + ops, err = libovsdbops.CreateOrUpdateQoSesOps(c.nbClient, ops, qoses...) + if err != nil { + return fmt.Errorf("failed to create QoS operations for %s/%s: %w", qosState.namespace, qosState.name, err) + } + // identify qoses need binding to lsw + newQoSes := []*nbdb.QoS{} + for _, qos := range qoses { + if slices.Contains(lsw.QOSRules, qos.UUID) { + continue + } + newQoSes = append(newQoSes, qos) + } + if len(newQoSes) > 0 { + ops, err = libovsdbops.AddQoSesToLogicalSwitchOps(c.nbClient, ops, switchName, newQoSes...) + if err != nil { + return fmt.Errorf("failed to create operations to add QoS to switch %s: %w", switchName, err) + } + } + if _, err := libovsdbops.TransactAndCheck(c.nbClient, ops); err != nil { + return fmt.Errorf("failed to execute ops to add QoSes to switch %s, err: %w", switchName, err) + } + return nil +} + +// remove qos from a list of logical switches +func (c *Controller) removeQoSFromLogicalSwitches(qosState *networkQoSState, switchNames []string) error { + qoses, err := libovsdbops.FindQoSesWithPredicate(c.nbClient, func(qos *nbdb.QoS) bool { + return qos.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == c.controllerName && + qos.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) && + qos.ExternalIDs[libovsdbops.ObjectNameKey.String()] == qosState.getObjectNameKey() + }) + if err != nil { + return fmt.Errorf("failed to look up QoSes for %s/%s: %v", qosState.namespace, qosState.name, err) + } + unbindQoSOps := []ovsdb.Operation{} + // remove qos rules from logical switches + for _, lsName := range switchNames { + ops, err := libovsdbops.RemoveQoSesFromLogicalSwitchOps(c.nbClient, nil, lsName, qoses...) + if err != nil { + return fmt.Errorf("failed to get ops to remove QoSes from switches %s for NetworkQoS %s/%s: %w", lsName, qosState.namespace, qosState.name, err) + } + unbindQoSOps = append(unbindQoSOps, ops...) + } + if _, err := libovsdbops.TransactAndCheck(c.nbClient, unbindQoSOps); err != nil { + return fmt.Errorf("failed to execute ops to remove QoSes from logical switches, err: %w", err) + } + return nil +} + +func (c *Controller) cleanupStaleOvnObjects(qosState *networkQoSState) error { + // find existing QoSes owned by NetworkQoS + existingQoSes, err := libovsdbops.FindQoSesWithPredicate(c.nbClient, func(qos *nbdb.QoS) bool { + return qos.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == c.controllerName && + qos.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) && + qos.ExternalIDs[libovsdbops.ObjectNameKey.String()] == qosState.getObjectNameKey() + }) + if err != nil { + return fmt.Errorf("error looking up existing QoSes for %s/%s: %v", qosState.namespace, qosState.name, err) + } + staleSwitchQoSMap := map[string][]*nbdb.QoS{} + totalNumOfRules := len(qosState.EgressRules) + for _, qos := range existingQoSes { + index := qos.ExternalIDs[libovsdbops.RuleIndex.String()] + numIndex, convError := strconv.Atoi(index) + indexWithinRange := false + if index != "" && convError == nil && numIndex < totalNumOfRules { + // rule index is valid + indexWithinRange = true + } + // qos is considered stale since the index is out of range + // get switches that reference to the stale qos + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(c.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return util.SliceHasStringItem(ls.QOSRules, qos.UUID) + }) + if err != nil { + if !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("error looking up logical switches by qos: %w", err) + } + continue + } + // build map of switch->list(qos) + for _, ls := range switches { + if _, qosInUse := qosState.SwitchRefs.Load(ls.Name); indexWithinRange && qosInUse { + continue + } + qosList := staleSwitchQoSMap[ls.Name] + if qosList == nil { + qosList = []*nbdb.QoS{} + } + qosList = append(qosList, qos) + staleSwitchQoSMap[ls.Name] = qosList + } + } + allOps, err := c.findStaleAddressSets(qosState) + if err != nil { + return fmt.Errorf("failed to get ops to delete stale address sets for NetworkQoS %s/%s: %w", qosState.namespace, qosState.name, err) + } + // remove stale qos rules from logical switches + for lsName, qoses := range staleSwitchQoSMap { + var switchOps []ovsdb.Operation + switchOps, err = libovsdbops.RemoveQoSesFromLogicalSwitchOps(c.nbClient, switchOps, lsName, qoses...) + if err != nil { + return fmt.Errorf("failed to get ops to remove stale QoSes from switches %s for NetworkQoS %s/%s: %w", lsName, qosState.namespace, qosState.name, err) + } + allOps = append(allOps, switchOps...) + } + // commit allOps + if _, err := libovsdbops.TransactAndCheck(c.nbClient, allOps); err != nil { + return fmt.Errorf("failed to execute ops to clean up stale QoSes, err: %w", err) + } + return nil +} + +// delete ovn QoSes generated from network qos +func (c *Controller) deleteByName(ovnObjectName string) error { + qoses, err := libovsdbops.FindQoSesWithPredicate(c.nbClient, func(qos *nbdb.QoS) bool { + return qos.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == c.controllerName && + qos.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) && + qos.ExternalIDs[libovsdbops.ObjectNameKey.String()] == ovnObjectName + }) + if err != nil { + return fmt.Errorf("failed to look up QoSes by name %s: %v", ovnObjectName, err) + } + if err = c.deleteOvnQoSes(qoses); err != nil { + return fmt.Errorf("error cleaning up OVN QoSes for %s: %v", ovnObjectName, err) + } + // remove address sets + if err = c.deleteAddressSet(ovnObjectName); err != nil { + return fmt.Errorf("error cleaning up address sets for %s: %w", ovnObjectName, err) + } + return nil +} + +// delete a list of ovn QoSes +func (c *Controller) deleteOvnQoSes(qoses []*nbdb.QoS) error { + switchQoSMap := map[string][]*nbdb.QoS{} + for _, qos := range qoses { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(c.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return util.SliceHasStringItem(ls.QOSRules, qos.UUID) + }) + if err != nil { + if !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to look up logical switches by qos: %w", err) + } + continue + } + // get switches that reference to the stale qoses + for _, ls := range switches { + qosList := switchQoSMap[ls.Name] + if qosList == nil { + qosList = []*nbdb.QoS{} + } + qosList = append(qosList, qos) + switchQoSMap[ls.Name] = qosList + } + } + unbindQoSOps := []ovsdb.Operation{} + // remove qos rules from logical switches + for lsName, qoses := range switchQoSMap { + ops, err := libovsdbops.RemoveQoSesFromLogicalSwitchOps(c.nbClient, nil, lsName, qoses...) + if err != nil { + return fmt.Errorf("failed to get ops to remove QoSes from switch %s: %w", lsName, err) + } + unbindQoSOps = append(unbindQoSOps, ops...) + } + if _, err := libovsdbops.TransactAndCheck(c.nbClient, unbindQoSOps); err != nil { + return fmt.Errorf("failed to execute ops to remove QoSes from logical switches, err: %w", err) + } + // delete qos + delQoSOps, err := libovsdbops.DeleteQoSesOps(c.nbClient, nil, qoses...) + if err != nil { + return fmt.Errorf("failed to get ops to delete QoSes: %w", err) + } + if _, err := libovsdbops.TransactAndCheck(c.nbClient, delQoSOps); err != nil { + return fmt.Errorf("failed to execute ops to delete QoSes, err: %w", err) + } + return nil +} + +func (c *Controller) deleteAddressSet(qosName string) error { + // find address sets by networkqos name & controller name + delAddrSetOps, err := libovsdbops.DeleteAddressSetsWithPredicateOps(c.nbClient, nil, func(item *nbdb.AddressSet) bool { + return item.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == c.controllerName && + item.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) && + item.ExternalIDs[libovsdbops.ObjectNameKey.String()] == qosName + }) + if err != nil { + return fmt.Errorf("failed to get ops to delete address sets: %w", err) + } + if _, err := libovsdbops.TransactAndCheck(c.nbClient, delAddrSetOps); err != nil { + return fmt.Errorf("failed to execute ops to delete address sets, err: %w", err) + } + return nil +} + +// find stale address sets +// 1. find address sets owned by NetworkQoS +// 2. get address sets in use +// 3. compare and identify those not in use +func (c *Controller) findStaleAddressSets(qosState *networkQoSState) ([]ovsdb.Operation, error) { + staleAddressSets := []*nbdb.AddressSet{} + addrsets, err := libovsdbops.FindAddressSetsWithPredicate(c.nbClient, func(item *nbdb.AddressSet) bool { + return item.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == c.controllerName && + item.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) && + item.ExternalIDs[libovsdbops.ObjectNameKey.String()] == qosState.getObjectNameKey() + }) + if err != nil { + return nil, fmt.Errorf("failed to look up address sets: %w", err) + } + addrsetInUse := qosState.getAddressSetHashNames() + for _, addrset := range addrsets { + addrsetName := addrset.GetName() + if !slices.Contains(addrsetInUse, addrsetName) { + staleAddressSets = append(staleAddressSets, addrset) + } + } + return libovsdbops.DeleteAddressSetsOps(c.nbClient, nil, staleAddressSets...) +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_pod.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_pod.go new file mode 100644 index 0000000000..625a8549ee --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_pod.go @@ -0,0 +1,237 @@ +package networkqos + +import ( + "fmt" + "strings" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + nqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" +) + +func (c *Controller) processNextNQOSPodWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + eventData, shutdown := c.nqosPodQueue.Get() + if shutdown { + return false + } + defer c.nqosPodQueue.Done(eventData) + + if err := c.syncNetworkQoSPod(eventData); err != nil { + if c.nqosPodQueue.NumRequeues(eventData) < maxRetries { + c.nqosPodQueue.AddRateLimited(eventData) + return true + } + klog.Errorf("%s: Failed to reconcile pod %s/%s: %v", c.controllerName, eventData.namespace(), eventData.name(), err) + utilruntime.HandleError(fmt.Errorf("failed to reconcile pod %s/%s: %v", eventData.namespace(), eventData.name(), err)) + } + c.nqosPodQueue.Forget(eventData) + return true +} + +// syncNetworkQoSPod decides the main logic everytime +// we dequeue a key from the nqosPodQueue cache +func (c *Controller) syncNetworkQoSPod(eventData *eventData[*corev1.Pod]) error { + startTime := time.Now() + nqosNames, err := c.getNetworkQosForPodChange(eventData) + if err != nil { + return err + } + for nqosName := range nqosNames { + c.nqosQueue.Add(nqosName) + } + recordPodReconcileDuration(c.controllerName, time.Since(startTime).Milliseconds()) + return nil +} + +// setPodForNQOS will check if the pod meets source selector or dest selector +// - match source: add the ip to source address set, bind qos rule to the switch +// - match dest: add the ip to the destination address set +func (c *Controller) setPodForNQOS(pod *corev1.Pod, nqosState *networkQoSState, namespace *corev1.Namespace, addressSetMap map[string]sets.Set[string]) error { + addresses, err := getPodAddresses(pod, c.NetInfo) + if err == nil && len(addresses) == 0 { + // pod either is not attached to this network, or hasn't been annotated with addresses yet, return without retry + klog.V(6).Infof("Pod %s/%s doesn't have addresses on network %s, skip NetworkQoS processing", pod.Namespace, pod.Name, c.GetNetworkName()) + return nil + } else if err != nil { + return fmt.Errorf("failed to parse addresses for pod %s/%s, network %s, err: %v", pod.Namespace, pod.Name, c.GetNetworkName(), err) + } + fullPodName := joinMetaNamespaceAndName(pod.Namespace, pod.Name) + // is pod in this zone + if c.isPodScheduledinLocalZone(pod) { + if matchSource := nqosState.matchSourceSelector(pod); matchSource { + // pod's labels match source selector + if err = nqosState.configureSourcePod(c, pod, addresses); err == nil { + populateAddresses(addressSetMap, nqosState.SrcAddrSet.GetName(), addresses) + } + } else { + // pod's labels don't match selector, but it probably matched previously + err = nqosState.removePodFromSource(c, fullPodName, addresses) + } + if err != nil { + return err + } + } else { + klog.V(4).Infof("Pod %s is not scheduled in local zone, call remove to ensure it's not in source", fullPodName) + err = nqosState.removePodFromSource(c, fullPodName, addresses) + if err != nil { + return err + } + } + return reconcilePodForDestinations(nqosState, namespace, pod, addresses, addressSetMap) +} + +func reconcilePodForDestinations(nqosState *networkQoSState, podNs *corev1.Namespace, pod *corev1.Pod, addresses []string, addressSetMap map[string]sets.Set[string]) error { + fullPodName := joinMetaNamespaceAndName(pod.Namespace, pod.Name) + for _, rule := range nqosState.EgressRules { + for index, dest := range rule.Classifier.Destinations { + if dest.PodSelector == nil && dest.NamespaceSelector == nil { + continue + } + if dest.matchPod(podNs, pod, nqosState.namespace) { + // add pod address to address set + if err := dest.addPod(pod.Namespace, pod.Name, addresses); err != nil { + return fmt.Errorf("failed to add addresses {%s} to dest address set %s for NetworkQoS %s/%s, rule index %d: %v", strings.Join(addresses, ","), dest.DestAddrSet.GetName(), nqosState.namespace, nqosState.name, index, err) + } + populateAddresses(addressSetMap, dest.DestAddrSet.GetName(), addresses) + } else { + // no match, remove the pod if it's previously selected + if err := dest.removePod(fullPodName, addresses); err != nil { + return fmt.Errorf("failed to delete addresses {%s} from dest address set %s for NetworkQoS %s/%s, rule index %d: %v", strings.Join(addresses, ","), dest.DestAddrSet.GetName(), nqosState.namespace, nqosState.name, index, err) + } + } + } + } + return nil +} + +func (c *Controller) getNetworkQosForPodChange(eventData *eventData[*corev1.Pod]) (sets.Set[string], error) { + var pod *corev1.Pod + if eventData.new != nil { + pod = eventData.new + } else { + pod = eventData.old + } + podNs, err := c.nqosNamespaceLister.Get(pod.Namespace) + if err != nil { + return nil, fmt.Errorf("failed to get namespace %s: %v", pod.Namespace, err) + } + nqoses, err := c.getAllNetworkQoSes() + if err != nil { + return nil, err + } + affectedNetworkQoSes := sets.Set[string]{} + for _, nqos := range nqoses { + if podMatchesSourceSelector(pod, nqos) { + affectedNetworkQoSes.Insert(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + continue + } + // check if pod matches any egress + for _, egress := range nqos.Spec.Egress { + if podMatchesEgressSelector(podNs, pod, nqos, &egress) { + affectedNetworkQoSes.Insert(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + continue + } + } + if podSelectionChanged(nqos, eventData.new, eventData.old) { + affectedNetworkQoSes.Insert(joinMetaNamespaceAndName(nqos.Namespace, nqos.Name)) + } + } + return affectedNetworkQoSes, nil +} + +func podMatchesSourceSelector(pod *corev1.Pod, nqos *nqosv1alpha1.NetworkQoS) bool { + if nqos.Namespace != pod.Namespace { + return false + } + if nqos.Spec.PodSelector.Size() == 0 { + return true + } + podSelector, err := metav1.LabelSelectorAsSelector(&nqos.Spec.PodSelector) + if err != nil { + klog.Errorf("Failed to convert pod selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + return false + } + return podSelector.Matches(labels.Set(pod.Labels)) +} + +func podMatchesEgressSelector(podNs *corev1.Namespace, pod *corev1.Pod, nqos *nqosv1alpha1.NetworkQoS, egress *nqosv1alpha1.Rule) bool { + var nsSelector labels.Selector + var podSelector labels.Selector + var err error + match := false + for _, dest := range egress.Classifier.To { + if dest.NamespaceSelector != nil { + if nsSelector, err = metav1.LabelSelectorAsSelector(dest.NamespaceSelector); err != nil { + klog.Errorf("Failed to convert namespace selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + continue + } + } + if dest.PodSelector != nil { + if podSelector, err = metav1.LabelSelectorAsSelector(dest.PodSelector); err != nil { + klog.Errorf("Failed to convert pod selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + continue + } + } + switch { + case nsSelector != nil && podSelector != nil: + match = nsSelector.Matches(labels.Set(podNs.Labels)) && podSelector.Matches(labels.Set(pod.Labels)) + case nsSelector == nil && podSelector != nil: + match = pod.Namespace == nqos.Namespace && podSelector.Matches(labels.Set(pod.Labels)) + case nsSelector != nil && podSelector == nil: + match = nsSelector.Matches(labels.Set(podNs.Labels)) + default: //nsSelector == nil && podSelector == nil: + match = false + } + if match { + return true + } + } + return false +} + +func podSelectionChanged(nqos *nqosv1alpha1.NetworkQoS, new *corev1.Pod, old *corev1.Pod) bool { + if new == nil || old == nil { + return false + } + if nqos.Spec.PodSelector.Size() > 0 { + if podSelector, err := metav1.LabelSelectorAsSelector(&nqos.Spec.PodSelector); err != nil { + klog.Errorf("Failed to convert pod selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + } else if podSelector.Matches(labels.Set(new.Labels)) != podSelector.Matches(labels.Set(old.Labels)) { + return true + } + } + for _, egress := range nqos.Spec.Egress { + for _, dest := range egress.Classifier.To { + if dest.PodSelector == nil { + continue + } + if podSelector, err := metav1.LabelSelectorAsSelector(dest.PodSelector); err != nil { + klog.Errorf("Failed to convert pod selector in %s/%s: %v", nqos.Namespace, nqos.Name, err) + } else if podSelector.Matches(labels.Set(new.Labels)) != podSelector.Matches(labels.Set(old.Labels)) { + return true + } + } + } + return false +} + +func populateAddresses(addressSetMap map[string]sets.Set[string], name string, addresses []string) { + if len(addresses) == 0 { + return + } + addressSet := addressSetMap[name] + if addressSet == nil { + addressSet = sets.New[string]() + } + addressSet.Insert(addresses...) + addressSetMap[name] = addressSet +} diff --git a/go-controller/pkg/ovn/controller/network_qos/network_qos_test.go b/go-controller/pkg/ovn/controller/network_qos/network_qos_test.go new file mode 100644 index 0000000000..fd92922479 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/network_qos_test.go @@ -0,0 +1,1275 @@ +package networkqos + +import ( + "context" + "fmt" + "slices" + "strconv" + "sync" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + nqostype "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + ovnk8stesting "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Custom NetInfo implementations for testing +type primaryNetInfoWrapper struct { + util.NetInfo +} + +func (n *primaryNetInfoWrapper) IsPrimaryNetwork() bool { + return true +} + +func (n *primaryNetInfoWrapper) IsSecondary() bool { + return false +} + +func (n *primaryNetInfoWrapper) IsDefault() bool { + return false +} + +func (n *primaryNetInfoWrapper) GetNetInfo() util.NetInfo { + return n +} + +type secondaryNetInfoWrapper struct { + util.NetInfo +} + +func (n *secondaryNetInfoWrapper) IsPrimaryNetwork() bool { + return false +} + +func (n *secondaryNetInfoWrapper) IsSecondary() bool { + return true +} + +func (n *secondaryNetInfoWrapper) IsDefault() bool { + return false +} + +func (n *secondaryNetInfoWrapper) GetNetInfo() util.NetInfo { + return n +} + +func init() { + config.IPv4Mode = true + config.IPv6Mode = false + config.OVNKubernetesFeature.EnableNetworkQoS = true + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableInterconnect = false // set via tableEntrySetup +} + +var ( + defaultControllerName = "default-network-controller" + streamControllerName = "stream-network-controller" + watchFactory *factory.WatchFactory + stopChan chan (struct{}) + nbClient libovsdbclient.Client + nbsbCleanup *libovsdbtest.Context + fakeKubeClient kubernetes.Interface + fakeNQoSClient networkqosclientset.Interface + wg sync.WaitGroup + defaultAddrsetFactory addressset.AddressSetFactory + streamAddrsetFactory addressset.AddressSetFactory + + nqosNamespace = "network-qos-test" + nqosName = "my-network-qos" + clientPodName = "client-pod" + + app1Namespace = "app1-ns" + app3Namespace = "app3-ns" + port8080 = int32(8080) + port8081 = int32(8081) + port9090 = int32(9090) +) + +func TestNetworkQoS(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "NetworkQoS Controller") +} + +func tableEntrySetup(enableInterconnect bool) { + config.OVNKubernetesFeature.EnableInterconnect = enableInterconnect + + ns0 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nqosNamespace, + Labels: map[string]string{ + "app": "client", + }, + }, + } + ns1 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: app1Namespace, + Labels: map[string]string{ + "app": "app1", + }, + }, + } + ns3 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: app3Namespace, + Labels: map[string]string{ + "app": "app3", + }, + }, + } + nqos := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: nqosName, + }, + Spec: nqostype.Spec{ + Priority: 100, + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "client", + }, + }, + Egress: []nqostype.Rule{ + { + DSCP: 50, + Bandwidth: nqostype.Bandwidth{ + Rate: 10000, + Burst: 100000, + }, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "component": "service1", + }, + }, + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app1", + }, + }, + }, + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "128.116.0.0/17", + Except: []string{ + "128.116.0.0", + "128.116.0.255", + }, + }, + }, + }, + Ports: []*nqostype.Port{ + { + Protocol: "tcp", + Port: &port8080, + }, + { + Protocol: "tcp", + Port: &port8081, + }, + }, + }, + }, + { + DSCP: 51, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "component": "service3", + }, + }, + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app3", + }, + }, + }, + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "128.118.0.0/17", + Except: []string{ + "128.118.0.0", + "128.118.0.255", + }, + }, + }, + }, + Ports: []*nqostype.Port{ + { + Protocol: "tcp", + Port: &port8080, + }, + { + Protocol: "tcp", + Port: &port8081, + }, + { + Protocol: "udp", + Port: &port9090, + }, + { + Protocol: "udp", + Port: &port8080, + }, + }, + }, + }, + }, + }, + } + + node1 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{ + "k8s.ovn.org/zone-name": "node1", + }, + }, + } + + node2 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{ + "k8s.ovn.org/zone-name": "node2", + }, + }, + } + + clientPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: clientPodName, + Labels: map[string]string{ + "app": "client", + }, + Annotations: map[string]string{ + "k8s.ovn.org/pod-networks": `{"default/stream": {"ip_addresses":["10.128.2.3/26"],"mac_address":"0a:58:0a:80:02:03"}, "default":{"ip_addresses":["10.192.177.4/26"],"mac_address":"0a:58:0a:c0:b1:04","gateway_ips":["10.192.177.1"],"routes":[{"dest":"10.192.0.0/16","nextHop":"10.192.177.1"},{"dest":"10.223.0.0/16","nextHop":"10.192.177.1"},{"dest":"100.64.0.0/16","nextHop":"10.192.177.1"}],"mtu":"1500","ip_address":"10.192.177.4/26","gateway_ip":"10.192.177.1"}}`, + "k8s.v1.cni.cncf.io/networks": `[{"interface":"net1","name":"stream","namespace":"default"}]`, + }, + }, + Spec: corev1.PodSpec{ + HostNetwork: false, + NodeName: "node1", + }, + } + + nad := ovnk8stesting.GenerateNAD("stream", "stream", "default", types.Layer3Topology, "10.128.2.0/16/24", types.NetworkRoleSecondary) + nad.Labels = map[string]string{ + "name": "stream", + } + + initialDB := &libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + Name: "node1", + }, + &nbdb.LogicalSwitch{ + Name: "node2", + }, + &nbdb.LogicalSwitch{ + Name: "stream_node1", + }, + }, + } + + ovnClientset := util.GetOVNClientset(ns0, ns1, ns3, node1, node2, clientPod, nqos, nad) + fakeKubeClient = ovnClientset.KubeClient + fakeNQoSClient = ovnClientset.NetworkQoSClient + initEnv(ovnClientset, initialDB) + // init controller for default network + initNetworkQoSController(&util.DefaultNetInfo{}, defaultAddrsetFactory, defaultControllerName, enableInterconnect) + // init controller for stream nad + streamImmutableNadInfo, err := util.ParseNADInfo(nad) + Expect(err).NotTo(HaveOccurred()) + streamNadInfo := util.NewMutableNetInfo(streamImmutableNadInfo) + streamNadInfo.AddNADs("default/stream") + initNetworkQoSController(streamNadInfo, streamAddrsetFactory, streamControllerName, enableInterconnect) +} + +var _ = AfterEach(func() { + shutdownController() + if nbsbCleanup != nil { + nbsbCleanup.Cleanup() + nbsbCleanup = nil + } +}) + +var _ = Describe("NetworkQoS Controller", func() { + + var _ = Context("With different interconnect configurations", func() { + + DescribeTable("When starting controller with NetworkQoS, Pod and Node objects", + func(enableInterconnect bool) { + tableEntrySetup(enableInterconnect) + + By("creates address sets for source and destination pod selectors") + { + eventuallyExpectAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "src", "0", defaultControllerName) + eventuallyExpectAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName) + eventuallyExpectAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName) + } + + By("creates QoS rules in ovn nb") + { + qos0 := eventuallyExpectQoS(defaultControllerName, nqosNamespace, nqosName, 0) + qos1 := eventuallyExpectQoS(defaultControllerName, nqosNamespace, nqosName, 1) + eventuallySwitchHasQoS("node1", qos0) + eventuallySwitchHasQoS("node1", qos1) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "src", "0", defaultControllerName, "10.192.177.4") + sourceAddrSet, err := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "src", "0", defaultControllerName) + Expect(err).NotTo(HaveOccurred()) + dst1AddrSet, err1 := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName) + Expect(err1).NotTo(HaveOccurred()) + srcHashName4, _ := sourceAddrSet.GetASHashNames() + dst1HashName4, _ := dst1AddrSet.GetASHashNames() + Expect(qos0.Match).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && (ip4.dst == {$%s} || (ip4.dst == 128.116.0.0/17 && ip4.dst != {128.116.0.0,128.116.0.255})) && tcp && tcp.dst == {8080,8081}", srcHashName4, dst1HashName4))) + Expect(qos0.Action).To(ContainElement(50)) + Expect(qos0.Priority).To(Equal(11000)) + Expect(qos0.Bandwidth).To(ContainElements(10000, 100000)) + dst3AddrSet, err3 := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName) + Expect(err3).NotTo(HaveOccurred()) + dst3HashName4, _ := dst3AddrSet.GetASHashNames() + Expect(qos1.Match).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && (ip4.dst == {$%s} || (ip4.dst == 128.118.0.0/17 && ip4.dst != {128.118.0.0,128.118.0.255})) && ((tcp && tcp.dst == {8080,8081}) || (udp && udp.dst == {9090,8080}))", srcHashName4, dst3HashName4))) + } + + app1Pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: app1Namespace, + Name: "app1-pod", + Labels: map[string]string{ + "component": "service1", + }, + Annotations: map[string]string{ + "k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.194.188.4/26"],"mac_address":"0a:58:0a:c2:bc:04","gateway_ips":["10.194.188.1"],"routes":[{"dest":"10.194.0.0/16","nextHop":"10.194.188.1"},{"dest":"10.223.0.0/16","nextHop":"10.194.188.1"},{"dest":"100.64.0.0/16","nextHop":"10.194.188.1"}],"mtu":"1500","ip_address":"10.194.188.4/26","gateway_ip":"10.194.188.1"}}`, + }, + }, + Spec: corev1.PodSpec{ + HostNetwork: false, + NodeName: "node2", + }, + } + + By("adds IP to destination address set for matching pod") + { + _, err := fakeKubeClient.CoreV1().Pods(app1Pod.Namespace).Create(context.TODO(), app1Pod, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + + By("updates match strings if egress rules change") + nqosUpdate, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Get(context.TODO(), nqosName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + nqosUpdate.ResourceVersion = time.Now().String() + nqosUpdate.Spec.Egress[1].Classifier.To[1].IPBlock.Except = nil + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + sourceAddrSet, err := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "src", "0", defaultControllerName) + Expect(err).NotTo(HaveOccurred()) + dst1AddrSet, err1 := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName) + Expect(err1).NotTo(HaveOccurred()) + srcHashName4, _ := sourceAddrSet.GetASHashNames() + dst1HashName4, _ := dst1AddrSet.GetASHashNames() + + Eventually(func() string { + qos, err := findQoS(defaultControllerName, nqosNamespace, nqosName, 0) + if err != nil { + return err.Error() + } + return qos.Match + }).WithTimeout(10 * time.Second).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && (ip4.dst == {$%s} || (ip4.dst == 128.116.0.0/17 && ip4.dst != {128.116.0.0,128.116.0.255})) && tcp && tcp.dst == {8080,8081}", srcHashName4, dst1HashName4))) + + dst3AddrSet, err3 := findAddressSet(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName) + Expect(err3).NotTo(HaveOccurred()) + dst3HashName4, _ := dst3AddrSet.GetASHashNames() + Eventually(func() string { + qos, err := findQoS(defaultControllerName, nqosNamespace, nqosName, 1) + if err != nil { + return err.Error() + } + return qos.Match + }).WithTimeout(10 * time.Second).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && (ip4.dst == {$%s} || (ip4.dst == 128.118.0.0/17 && ip4.dst != {128.118.0.0,128.118.0.255})) && ((tcp && tcp.dst == {8080,8081}) || (udp && udp.dst == {9090,8080}))", srcHashName4, dst3HashName4))) + } + + By("removes IP from destination address set if pod's labels don't match the selector") + { + updatePod := app1Pod.DeepCopy() + updatePod.Labels["component"] = "dummy" + updatePod.ResourceVersion = time.Now().String() + _, err := fakeKubeClient.CoreV1().Pods(app1Pod.Namespace).Update(context.TODO(), updatePod, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("adds IP to destination address set again if pod's labels match the selector") + { + updatePod := app1Pod.DeepCopy() + updatePod.Labels["component"] = "service1" + _, err := fakeKubeClient.CoreV1().Pods(app1Pod.Namespace).Update(context.TODO(), updatePod, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("removes IP from destination address set if target namespace labels don't match the selector") + { + ns, err := fakeKubeClient.CoreV1().Namespaces().Get(context.TODO(), app1Namespace, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + ns.ResourceVersion = time.Now().String() + ns.Labels["app"] = "dummy" + _, err = fakeKubeClient.CoreV1().Namespaces().Update(context.TODO(), ns, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("adds IP to destination address set again if namespace's labels match the selector") + { + ns, err := fakeKubeClient.CoreV1().Namespaces().Get(context.TODO(), app1Namespace, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + ns.ResourceVersion = time.Now().String() + ns.Labels["app"] = "app1" + _, err = fakeKubeClient.CoreV1().Namespaces().Update(context.TODO(), ns, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("removes IP from destination address set if namespace selector changes") + { + nqosUpdate, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Get(context.TODO(), nqosName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + nqosUpdate.Spec.Egress[0].Classifier.To[0].NamespaceSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "dummy", + }, + } + nqosUpdate.ResourceVersion = time.Now().String() + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("adds IP to destination address set if namespace selector is restored") + { + nqosUpdate, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Get(context.TODO(), nqosName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + nqosUpdate.Spec.Egress[0].Classifier.To[0].NamespaceSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app1", + }, + } + nqosUpdate.ResourceVersion = time.Now().String() + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + app3Pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: app3Namespace, + Name: "app3-pod", + Labels: map[string]string{ + "component": "service3", + }, + Annotations: map[string]string{ + "k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.195.188.4/26"],"mac_address":"0a:58:0a:c3:bc:04","gateway_ips":["10.195.188.1"],"routes":[{"dest":"10.195.0.0/16","nextHop":"10.195.188.1"},{"dest":"10.223.0.0/16","nextHop":"10.195.188.1"},{"dest":"100.64.0.0/16","nextHop":"10.195.188.1"}],"mtu":"1500","ip_address":"10.195.188.4/26","gateway_ip":"10.195.188.1"}}`, + }, + }, + Spec: corev1.PodSpec{ + HostNetwork: false, + NodeName: "node2", + }, + } + + By("adds IP to destination address set of the second rule for matching pod") + { + _, err := fakeKubeClient.CoreV1().Pods(app3Pod.Namespace).Create(context.TODO(), app3Pod, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName, "10.195.188.4") + } + + By("adds new QoS rule to ovn nb when a new Egress rule is added") + { + nqosUpdate, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Get(context.TODO(), nqosName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + nqosUpdate.Spec.Egress = append(nqosUpdate.Spec.Egress, nqostype.Rule{ + DSCP: 102, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app1", + }, + }, + }, + }, + }, + }) + nqosUpdate.ResourceVersion = time.Now().String() + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyExpectQoS(defaultControllerName, nqosNamespace, nqosName, 2) + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "src", "0", defaultControllerName, "10.192.177.4") + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName, "10.195.188.4") + eventuallyAddressSetHas(defaultAddrsetFactory, nqosNamespace, nqosName, "2", "0", defaultControllerName, "10.194.188.4") + } + + nqos4StreamNet := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: "stream-qos", + }, + Spec: nqostype.Spec{ + NetworkSelectors: []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.NetworkAttachmentDefinitions, + NetworkAttachmentDefinitionSelector: &crdtypes.NetworkAttachmentDefinitionSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "unknown", + }, + }, + }, + }, + }, + Priority: 100, + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "client", + }, + }, + Egress: []nqostype.Rule{ + { + DSCP: 50, + Bandwidth: nqostype.Bandwidth{ + Rate: 10000, + Burst: 100000, + }, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "128.115.0.0/17", + Except: []string{ + "128.115.0.0", + "128.115.0.255", + }, + }, + }, + }, + }, + }, + }, + }, + } + + By("will not handle NetworkQos with unknown NetworkAttachmentDefinition in spec") + { + _, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Create(context.TODO(), nqos4StreamNet, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, "stream-qos", 0) + } + + By("will not populate source address set NetworkQos with incorrect namespace selector in spec") + { + nqos4StreamNet.Spec.NetworkSelectors = []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.NetworkAttachmentDefinitions, + NetworkAttachmentDefinitionSelector: &crdtypes.NetworkAttachmentDefinitionSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "unknown", + }, + }, + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "stream", + }, + }, + }, + }, + } + nqos4StreamNet.ResourceVersion = time.Now().String() + _, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqos4StreamNet, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(streamAddrsetFactory, nqosNamespace, "stream-qos", "src", "0", streamControllerName, "10.128.2.3") + } + + By("handles NetworkQos on secondary network") + { + nqos4StreamNet.Spec.NetworkSelectors = []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.NetworkAttachmentDefinitions, + NetworkAttachmentDefinitionSelector: &crdtypes.NetworkAttachmentDefinitionSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "stream", + }, + }, + }, + }, + } + nqos4StreamNet.ResourceVersion = time.Now().String() + _, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqos4StreamNet, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + qos := eventuallyExpectQoS(streamControllerName, nqosNamespace, "stream-qos", 0) + eventuallySwitchHasQoS("stream_node1", qos) + eventuallyAddressSetHas(streamAddrsetFactory, nqosNamespace, "stream-qos", "src", "0", streamControllerName, "10.128.2.3") + } + + By("uses namespace's address set as source if pod selector is not provided in source") + { + dbIDs := libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNamespace, defaultControllerName, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: nqosNamespace, + }) + addrset, err := defaultAddrsetFactory.EnsureAddressSet(dbIDs) + Expect(err).NotTo(HaveOccurred()) + err = addrset.AddAddresses([]string{"10.194.188.4"}) + Expect(err).NotTo(HaveOccurred()) + nqosWithoutSrcSelector := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: "no-source-selector", + }, + Spec: nqostype.Spec{ + Priority: 100, + Egress: []nqostype.Rule{ + { + DSCP: 50, + Bandwidth: nqostype.Bandwidth{ + Rate: 10000, + Burst: 100000, + }, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "128.115.0.0/17", + Except: []string{ + "128.115.0.0", + "123.123.123.123", + }, + }, + }, + }, + }, + }, + }, + }, + } + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Create(context.TODO(), nqosWithoutSrcSelector, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + qos := eventuallyExpectQoS(defaultControllerName, nqosNamespace, "no-source-selector", 0) + v4HashName, _ := addrset.GetASHashNames() + Expect(qos.Match).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && ip4.dst == 128.115.0.0/17 && ip4.dst != {128.115.0.0,123.123.123.123}", v4HashName))) + } + + By("clear QoS attributes of existing NetworkQoS and make sure that is proper") + { + nqosWithoutSrcSelector := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: "no-source-selector", + }, + Spec: nqostype.Spec{ + Priority: 1, + Egress: []nqostype.Rule{ + { + DSCP: 50, + // Bandwidth: nqostype.Bandwidth{}, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "128.115.0.0/17", + Except: []string{ + "128.115.0.0", + "123.123.123.123", + }, + }, + }, + }, + }, + }, + }, + }, + } + nqosWithoutSrcSelector.ResourceVersion = time.Now().String() + _, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosWithoutSrcSelector, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + dbIDs := libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNamespace, defaultControllerName, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: nqosNamespace, + }) + addrset, err := defaultAddrsetFactory.EnsureAddressSet(dbIDs) + Expect(err).NotTo(HaveOccurred()) + v4HashName, _ := addrset.GetASHashNames() + + // Ensure that QoS priority and Bandwidth have been properly changed by OVN + var qos *nbdb.QoS + Eventually(func() bool { + qos, err = findQoS(defaultControllerName, nqosNamespace, "no-source-selector", 0) + Expect(err).NotTo(HaveOccurred()) + Expect(qos).NotTo(BeNil()) + return qos.Priority == 10010 && len(qos.Bandwidth) == 0 + }).WithTimeout(10 * time.Second).WithPolling(1 * time.Second).Should(BeTrue()) + Expect(qos.Match).Should(Equal(fmt.Sprintf("ip4.src == {$%s} && ip4.dst == 128.115.0.0/17 && ip4.dst != {128.115.0.0,123.123.123.123}", v4HashName))) + } + + By("removes IP from destination address set if pod is deleted") + { + err := fakeKubeClient.CoreV1().Pods(app1Pod.Namespace).Delete(context.TODO(), app1Pod.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(defaultAddrsetFactory, nqosNamespace, nqosName, "0", "0", defaultControllerName, "10.194.188.4") + } + + By("removes IP from destination address set of the second rule if namespace is deleted") + { + err := fakeKubeClient.CoreV1().Namespaces().Delete(context.TODO(), app3Pod.Namespace, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyAddressSetHasNo(defaultAddrsetFactory, nqosNamespace, nqosName, "1", "0", defaultControllerName, "10.195.188.4") + err = fakeKubeClient.CoreV1().Pods(app3Pod.Namespace).Delete(context.TODO(), app3Pod.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + By("deletes stale QoS from ovn nb when Egress rule is deleted") + { + qos2, err1 := findQoS(defaultControllerName, nqosNamespace, nqosName, 2) + Expect(err1).NotTo(HaveOccurred()) + nqosUpdate, err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Get(context.TODO(), nqosName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + nqosUpdate.ResourceVersion = time.Now().String() + nqosUpdate.Spec.Egress = slices.Delete(nqosUpdate.Spec.Egress, 1, 2) + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Update(context.TODO(), nqosUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallySwitchHasNoQoS("node1", qos2) + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, nqosName, 2) + } + + By("unbinds QoS rule from logical switch when no source pods is selected") + { + qos0, err0 := findQoS(defaultControllerName, nqosNamespace, nqosName, 0) + Expect(err0).NotTo(HaveOccurred()) + qos1, err1 := findQoS(defaultControllerName, nqosNamespace, nqosName, 1) + Expect(err1).NotTo(HaveOccurred()) + // qos should be present, as pod is not yet deleted + eventuallySwitchHasQoS("node1", qos0) + eventuallySwitchHasQoS("node1", qos1) + err := fakeKubeClient.CoreV1().Pods(nqosNamespace).Delete(context.TODO(), clientPodName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + // qos should be unbound from switch + eventuallySwitchHasNoQoS("node1", qos0) + eventuallySwitchHasNoQoS("node1", qos1) + } + + By("deletes QoS after NetworkQoS object is deleted") + { + err := fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Delete(context.TODO(), nqosName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, nqosName, 0) + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, nqosName, 1) + } + + By("generates correct logical switch name for localnet topology") + { + localnetNad := ovnk8stesting.GenerateNAD("netwk1", "netwk1", "default", types.LocalnetTopology, "10.129.0.0/16", types.NetworkRoleSecondary) + localnetImmutableNadInfo, err := util.ParseNADInfo(localnetNad) + Expect(err).NotTo(HaveOccurred()) + localnetNadInfo := util.NewMutableNetInfo(localnetImmutableNadInfo) + localnetNadInfo.AddNADs("default/netwk1") + ctrl := initNetworkQoSController(localnetNadInfo, addressset.NewFakeAddressSetFactory("netwk1-controller"), "netwk1-controller", enableInterconnect) + lsName := ctrl.getLogicalSwitchName("dummy") + Expect(lsName).To(Equal("netwk1_ovn_localnet_switch")) + } + + By("generates correct logical switch name for layer2 topology") + { + layer2Nad := ovnk8stesting.GenerateNAD("netwk2", "netwk2", "default", types.Layer2Topology, "10.130.0.0/16", types.NetworkRoleSecondary) + layer2ImmutableNadInfo, err := util.ParseNADInfo(layer2Nad) + Expect(err).NotTo(HaveOccurred()) + layer2NadInfo := util.NewMutableNetInfo(layer2ImmutableNadInfo) + layer2NadInfo.AddNADs("default/netwk2") + ctrl := initNetworkQoSController(layer2NadInfo, addressset.NewFakeAddressSetFactory("netwk2-controller"), "netwk2-controller", enableInterconnect) + lsName := ctrl.getLogicalSwitchName("dummy") + Expect(lsName).To(Equal("netwk2_ovn_layer2_switch")) + } + + By("handles NetworkQoS with PrimaryUserDefinedNetworks selector") + { + // Create a NetworkQoS targeting primary networks + nqosPrimaryNet := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: "primary-network-qos", + }, + Spec: nqostype.Spec{ + NetworkSelectors: []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.PrimaryUserDefinedNetworks, + PrimaryUserDefinedNetworkSelector: &crdtypes.PrimaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app1", + }, + }, + }, + }, + }, + Priority: 200, + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "client", + }, + }, + Egress: []nqostype.Rule{ + { + DSCP: 40, + Bandwidth: nqostype.Bandwidth{ + Rate: 20000, + Burst: 200000, + }, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "192.168.0.0/24", + }, + }, + }, + Ports: []*nqostype.Port{ + { + Protocol: "TCP", + Port: &port8080, + }, + }, + }, + }, + }, + }, + } + + // Create primary network controller + primaryNad := ovnk8stesting.GenerateNAD("primary", "primary", "default", types.Layer3Topology, "10.140.0.0/16", types.NetworkRolePrimary) + primaryImmutableNadInfo, err := util.ParseNADInfo(primaryNad) + Expect(err).NotTo(HaveOccurred()) + primaryNadInfo := util.NewMutableNetInfo(primaryImmutableNadInfo) + primaryNadInfo.AddNADs("default/primary") + + // Create the primary network logical switch + primarySwitch := &nbdb.LogicalSwitch{ + Name: "primary_node1", + } + err = libovsdbops.CreateOrUpdateLogicalSwitch(nbClient, primarySwitch) + Expect(err).NotTo(HaveOccurred()) + + // Wrap the NetInfo with our custom implementation that returns true for IsPrimaryNetwork() + primNetWrapper := &primaryNetInfoWrapper{NetInfo: primaryNadInfo} + initNetworkQoSController(primNetWrapper, addressset.NewFakeAddressSetFactory("primary-controller"), "primary-controller", enableInterconnect) + + // Ensure app1 namespace exists before testing primary networks + ns, err := fakeKubeClient.CoreV1().Namespaces().Get(context.TODO(), app1Namespace, metav1.GetOptions{}) + if err != nil || ns == nil { + klog.Infof("Creating app1 namespace for primary networks test") + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: app1Namespace, + Labels: map[string]string{ + "app": "app1", + }, + }, + } + _, err = fakeKubeClient.CoreV1().Namespaces().Create(context.TODO(), ns, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + // Create a client pod in the network QoS namespace + clientPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: clientPodName, + Labels: map[string]string{ + "app": "client", + }, + Annotations: map[string]string{ + "k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.192.177.4/26"],"mac_address":"0a:58:0a:c2:bc:04","gateway_ips":["10.192.177.1"],"routes":[{"dest":"10.192.0.0/16","nextHop":"10.192.177.1"},{"dest":"10.223.0.0/16","nextHop":"10.192.177.1"},{"dest":"100.64.0.0/16","nextHop":"10.192.177.1"}],"mtu":"1500","ip_address":"10.192.177.4/26","gateway_ip":"10.192.177.1"}}`, + }, + }, + Spec: corev1.PodSpec{ + HostNetwork: false, + NodeName: "node1", + }, + } + _, err = fakeKubeClient.CoreV1().Pods(nqosNamespace).Create(context.TODO(), clientPod, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Create and verify NetworkQoS + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Create(context.TODO(), nqosPrimaryNet, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // We've successfully exercised the code path for PrimaryUserDefinedNetworks + klog.Infof("Code path for PrimaryUserDefinedNetworks has been successfully tested") + + // Confirm the primary controller is processing this NetworkQoS and not the default controller + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, "primary-network-qos", 0) + + // Clean up + err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Delete(context.TODO(), "primary-network-qos", metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + By("handles NetworkQoS with SecondaryUserDefinedNetworks selector") + { + // Create a NetworkQoS targeting secondary networks + nqosSecondaryNet := &nqostype.NetworkQoS{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: "secondary-network-qos", + }, + Spec: nqostype.Spec{ + NetworkSelectors: []crdtypes.NetworkSelector{ + { + NetworkSelectionType: crdtypes.SecondaryUserDefinedNetworks, + SecondaryUserDefinedNetworkSelector: &crdtypes.SecondaryUserDefinedNetworkSelector{ + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "app3", + }, + }, + }, + }, + }, + Priority: 300, + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "client", + }, + }, + Egress: []nqostype.Rule{ + { + DSCP: 30, + Bandwidth: nqostype.Bandwidth{ + Rate: 30000, + Burst: 300000, + }, + Classifier: nqostype.Classifier{ + To: []nqostype.Destination{ + { + IPBlock: &networkingv1.IPBlock{ + CIDR: "172.16.0.0/24", + }, + }, + }, + Ports: []*nqostype.Port{ + { + Protocol: "UDP", + Port: &port9090, + }, + }, + }, + }, + }, + }, + } + + // Create secondary network controller + secondaryNad := ovnk8stesting.GenerateNAD("secondary", "secondary", "default", types.Layer3Topology, "10.150.0.0/16", types.NetworkRoleSecondary) + secondaryImmutableNadInfo, err := util.ParseNADInfo(secondaryNad) + Expect(err).NotTo(HaveOccurred()) + secondaryNadInfo := util.NewMutableNetInfo(secondaryImmutableNadInfo) + secondaryNadInfo.AddNADs("default/secondary") + + // Create the secondary network logical switch + secondarySwitch := &nbdb.LogicalSwitch{ + Name: "secondary_node1", + } + err = libovsdbops.CreateOrUpdateLogicalSwitch(nbClient, secondarySwitch) + Expect(err).NotTo(HaveOccurred()) + + // Wrap the NetInfo with our custom implementation that returns true for IsSecondary() + secNetWrapper := &secondaryNetInfoWrapper{NetInfo: secondaryNadInfo} + initNetworkQoSController(secNetWrapper, addressset.NewFakeAddressSetFactory("secondary-controller"), "secondary-controller", enableInterconnect) + + // Ensure app3 namespace exists before testing secondary networks + ns, err := fakeKubeClient.CoreV1().Namespaces().Get(context.TODO(), app3Namespace, metav1.GetOptions{}) + if err != nil || ns == nil { + klog.Infof("Creating app3 namespace for secondary networks test") + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: app3Namespace, + Labels: map[string]string{ + "app": "app3", + }, + }, + } + _, err = fakeKubeClient.CoreV1().Namespaces().Create(context.TODO(), ns, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + // Make sure client pod exists + _, err = fakeKubeClient.CoreV1().Pods(nqosNamespace).Get(context.TODO(), clientPodName, metav1.GetOptions{}) + if err != nil { + // Create a client pod in the network QoS namespace + clientPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nqosNamespace, + Name: clientPodName, + Labels: map[string]string{ + "app": "client", + }, + Annotations: map[string]string{ + "k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.192.177.4/26"],"mac_address":"0a:58:0a:c2:bc:04","gateway_ips":["10.192.177.1"],"routes":[{"dest":"10.192.0.0/16","nextHop":"10.192.177.1"},{"dest":"10.223.0.0/16","nextHop":"10.192.177.1"},{"dest":"100.64.0.0/16","nextHop":"10.192.177.1"}],"mtu":"1500","ip_address":"10.192.177.4/26","gateway_ip":"10.192.177.1"}}`, + }, + }, + Spec: corev1.PodSpec{ + HostNetwork: false, + NodeName: "node1", + }, + } + _, err = fakeKubeClient.CoreV1().Pods(nqosNamespace).Create(context.TODO(), clientPod, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + // Create and verify NetworkQoS + _, err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Create(context.TODO(), nqosSecondaryNet, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // We've successfully exercised the code path for SecondaryUserDefinedNetworks + klog.Infof("Code path for SecondaryUserDefinedNetworks has been successfully tested") + + // Confirm the secondary controller is processing this NetworkQoS and not the default controller + eventuallyExpectNoQoS(defaultControllerName, nqosNamespace, "secondary-network-qos", 0) + + // Clean up + err = fakeNQoSClient.K8sV1alpha1().NetworkQoSes(nqosNamespace).Delete(context.TODO(), "secondary-network-qos", metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + }, + Entry("Interconnect Disabled", false), + Entry("Interconnect Enabled", true), + ) + }) +}) + +func eventuallyExpectAddressSet(addrsetFactory addressset.AddressSetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName string) { + Eventually(func() bool { + addrset, _ := findAddressSet(addrsetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName) + return addrset != nil + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("address set not found for %s/%s, rule %s, address block %s", nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex)) +} + +func eventuallyAddressSetHas(addrsetFactory addressset.AddressSetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName, ip string) { + Eventually(func() bool { + addrset, _ := findAddressSet(addrsetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName) + if addrset == nil { + return false + } + ip4, _ := addrset.GetAddresses() + return slices.Contains(ip4, ip) + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("address set does not contain expected ip %s", ip)) +} + +func eventuallyAddressSetHasNo(addrsetFactory addressset.AddressSetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName, ip string) { + Eventually(func() bool { + addrset, _ := findAddressSet(addrsetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName) + if addrset == nil { + return true + } + ip4, _ := addrset.GetAddresses() + return !slices.Contains(ip4, ip) + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("address set still has unexpected ip %s", ip)) +} + +func findAddressSet(addrsetFactory addressset.AddressSetFactory, nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName string) (addressset.AddressSet, error) { + dbID := GetNetworkQoSAddrSetDbIDs(nqosNamespace, nqosName, qosRuleIndex, ipBlockIndex, controllerName) + return addrsetFactory.GetAddressSet(dbID) +} + +func eventuallyExpectQoS(controllerName, qosNamespace, qosName string, index int) *nbdb.QoS { + var qos *nbdb.QoS + Eventually(func() bool { + qos, _ = findQoS(controllerName, qosNamespace, qosName, index) + return qos != nil + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("QoS not found for %s/%s", qosNamespace, qosName)) + return qos +} + +func eventuallyExpectNoQoS(controllerName, qosNamespace, qosName string, index int) { + var qos *nbdb.QoS + Eventually(func() bool { + qos, _ = findQoS(controllerName, qosNamespace, qosName, index) + return qos == nil + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("Unexpected QoS found for %s/%s, index %d", qosNamespace, qosName, index)) +} + +func findQoS(controllerName, qosNamespace, qosName string, index int) (*nbdb.QoS, error) { + qosKey := joinMetaNamespaceAndName(qosNamespace, qosName, ":") + dbIDs := libovsdbops.NewDbObjectIDs(libovsdbops.NetworkQoS, controllerName, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: qosKey, + libovsdbops.RuleIndex: fmt.Sprintf("%d", index), + }) + predicate := libovsdbops.GetPredicate(dbIDs, func(item *nbdb.QoS) bool { + return item.ExternalIDs[libovsdbops.OwnerControllerKey.String()] == controllerName && + item.ExternalIDs[libovsdbops.ObjectNameKey.String()] == qosKey && + item.ExternalIDs[libovsdbops.RuleIndex.String()] == strconv.Itoa(index) + }) + qoses, err := libovsdbops.FindQoSesWithPredicate(nbClient, predicate) + if err != nil { + return nil, err + } + if len(qoses) == 1 { + return qoses[0], nil + } + return nil, nil +} + +func eventuallySwitchHasQoS(switchName string, qos *nbdb.QoS) { + var ls *nbdb.LogicalSwitch + Eventually(func() bool { + criteria := &nbdb.LogicalSwitch{ + Name: switchName, + } + ls, _ = libovsdbops.GetLogicalSwitch(nbClient, criteria) + return ls != nil && slices.Contains(ls.QOSRules, qos.UUID) + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("QoS rule %s not found in switch %s", qos.UUID, switchName)) +} + +func eventuallySwitchHasNoQoS(switchName string, qos *nbdb.QoS) { + var ls *nbdb.LogicalSwitch + Eventually(func() bool { + criteria := &nbdb.LogicalSwitch{ + Name: switchName, + } + ls, _ = libovsdbops.GetLogicalSwitch(nbClient, criteria) + return ls != nil && !slices.Contains(ls.QOSRules, qos.UUID) + }).WithTimeout(10*time.Second).WithPolling(1*time.Second).Should(BeTrue(), fmt.Sprintf("Unexpected QoS rule %s found in switch %s", qos.UUID, switchName)) +} + +func initEnv(clientset *util.OVNClientset, initialDB *libovsdbtest.TestSetup) { + var nbZoneFailed bool + var err error + stopChan = make(chan struct{}) + + watchFactory, err = factory.NewMasterWatchFactory( + &util.OVNMasterClientset{ + KubeClient: clientset.KubeClient, + NetworkQoSClient: clientset.NetworkQoSClient, + NetworkAttchDefClient: clientset.NetworkAttchDefClient, + }, + ) + Expect(err).NotTo(HaveOccurred()) + + if initialDB == nil { + initialDB = &libovsdbtest.TestSetup{} + } + nbClient, nbsbCleanup, err = libovsdbtest.NewNBTestHarness(*initialDB, nil) + Expect(err).NotTo(HaveOccurred()) + + _, err = libovsdbutil.GetNBZone(nbClient) + if err != nil { + nbZoneFailed = true + err = createTestNBGlobal(nbClient, "global") + Expect(err).NotTo(HaveOccurred()) + } + + if nbZoneFailed { + err = deleteTestNBGlobal(nbClient) + Expect(err).NotTo(HaveOccurred()) + } + defaultAddrsetFactory = addressset.NewFakeAddressSetFactory(defaultControllerName) + streamAddrsetFactory = addressset.NewFakeAddressSetFactory("stream-network-controller") +} + +func initNetworkQoSController(netInfo util.NetInfo, addrsetFactory addressset.AddressSetFactory, controllerName string, enableInterconnect bool) *Controller { + nqosController, err := NewController( + controllerName, + netInfo, + nbClient, + util.EventRecorder(fakeKubeClient), + fakeNQoSClient, + watchFactory.NetworkQoSInformer(), + watchFactory.NamespaceCoreInformer(), + watchFactory.PodCoreInformer(), + watchFactory.NodeCoreInformer(), + watchFactory.NADInformer(), + addrsetFactory, + func(pod *corev1.Pod) bool { + return pod.Spec.NodeName == "node1" || !enableInterconnect + }, "node1") + Expect(err).NotTo(HaveOccurred()) + err = watchFactory.Start() + Expect(err).NotTo(HaveOccurred()) + wg.Add(1) + go func() { + defer wg.Done() + nqosController.Run(1, stopChan) + }() + return nqosController +} + +func shutdownController() { + if watchFactory != nil { + watchFactory.Shutdown() + watchFactory = nil + } + if stopChan != nil { + close(stopChan) + stopChan = nil + } +} + +func createTestNBGlobal(nbClient libovsdbclient.Client, zone string) error { + nbGlobal := &nbdb.NBGlobal{Name: zone} + ops, err := nbClient.Create(nbGlobal) + if err != nil { + return err + } + + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil +} + +func deleteTestNBGlobal(nbClient libovsdbclient.Client) error { + p := func(_ *nbdb.NBGlobal) bool { + return true + } + ops, err := nbClient.WhereCache(p).Delete() + if err != nil { + return err + } + _, err = nbClient.Transact(context.Background(), ops...) + if err != nil { + return err + } + + return nil +} diff --git a/go-controller/pkg/ovn/controller/network_qos/repair.go b/go-controller/pkg/ovn/controller/network_qos/repair.go new file mode 100644 index 0000000000..5c97326e60 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/repair.go @@ -0,0 +1,75 @@ +package networkqos + +import ( + "time" + + "k8s.io/klog/v2" + + networkqosapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" +) + +// repairNetworkQoSes is called at startup and as the name suggests +// aims to repair the NBDB logical objects +// that are created for the network qoses in the cluster +func (c *Controller) repairNetworkQoSes() error { + start := time.Now() + defer func() { + klog.Infof("Repairing network qos took %v", time.Since(start)) + }() + nqoses, err := c.getAllNetworkQoSes() + if err != nil { + return err + } + nqosMap := map[string]*networkqosapi.NetworkQoS{} + for _, nqos := range nqoses { + nqosMap[joinMetaNamespaceAndName(nqos.Namespace, nqos.Name, ":")] = nqos + } + + // delete stale ovn qos objects owned by NetworkQoS + if err := libovsdbops.DeleteQoSesWithPredicate(c.nbClient, func(qos *nbdb.QoS) bool { + if qos.ExternalIDs[libovsdbops.OwnerControllerKey.String()] != c.controllerName || + qos.ExternalIDs[libovsdbops.OwnerTypeKey.String()] == string(libovsdbops.NetworkQoSOwnerType) { + return false + } + objName := qos.ExternalIDs[libovsdbops.ObjectNameKey.String()] + // doesn't have corresponding k8s name + if objName == "" { + klog.Warningf("OVN QoS %s doesn't have expected key %s", qos.UUID, libovsdbops.ObjectNameKey.String()) + return true + } + // clean up qoses whose k8s object has gone + if _, exists := nqosMap[objName]; !exists { + klog.Warningf("OVN QoS %s doesn't have expected NetworkQoS object %s", qos.UUID, objName) + return true + } + return false + }); err != nil { + klog.Errorf("Failed to get ops to clean up stale QoSes: %v", err) + } + + // delete address sets whose networkqos object has gone in k8s + if err := libovsdbops.DeleteAddressSetsWithPredicate(c.nbClient, func(addrset *nbdb.AddressSet) bool { + if addrset.ExternalIDs[libovsdbops.OwnerControllerKey.String()] != c.controllerName || + addrset.ExternalIDs[libovsdbops.OwnerTypeKey.String()] != string(libovsdbops.NetworkQoSOwnerType) { + return false + } + objName := addrset.ExternalIDs[libovsdbops.ObjectNameKey.String()] + // doesn't have corresponding k8s name + if objName == "" { + klog.Warningf("AddressSet %s doesn't have expected key %s", addrset.UUID, libovsdbops.ObjectNameKey.String()) + return true + } + // clean up qoses whose k8s object has gone + if _, exists := nqosMap[objName]; !exists { + klog.Warningf("AddressSet %s doesn't have expected NetworkQoS object %s", addrset.UUID, objName) + return true + } + return false + }); err != nil { + klog.Errorf("Failed to get ops clean up stale address sets: %v", err) + } + + return nil +} diff --git a/go-controller/pkg/ovn/controller/network_qos/types.go b/go-controller/pkg/ovn/controller/network_qos/types.go new file mode 100644 index 0000000000..188d61e689 --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/types.go @@ -0,0 +1,388 @@ +package networkqos + +import ( + "fmt" + "slices" + "sort" + "strconv" + "strings" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + knet "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + networkqosv1alpha1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" +) + +// networkQoSState is the cache that keeps the state of a single +// network qos in the cluster with namespace+name being unique +type networkQoSState struct { + sync.RWMutex + // name of the network qos + name string + namespace string + + SrcAddrSet addressset.AddressSet + Pods sync.Map // pods name -> ips in the srcAddrSet + SwitchRefs sync.Map // switch name -> list of source pods + PodSelector labels.Selector + + // egressRules stores the objects needed to track .Spec.Egress changes + EgressRules []*GressRule +} + +func (nqosState *networkQoSState) getObjectNameKey() string { + return joinMetaNamespaceAndName(nqosState.namespace, nqosState.name, ":") +} + +func (nqosState *networkQoSState) getDbObjectIDs(controller string, ruleIndex int) *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.NetworkQoS, controller, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: nqosState.getObjectNameKey(), + libovsdbops.RuleIndex: fmt.Sprintf("%d", ruleIndex), + }) +} + +func (nqosState *networkQoSState) emptyPodSelector() bool { + return nqosState.PodSelector == nil || nqosState.PodSelector.Empty() +} + +func (nqosState *networkQoSState) initAddressSets(addressSetFactory addressset.AddressSetFactory, controllerName string) error { + var err error + // init source address set + if nqosState.emptyPodSelector() { + nqosState.SrcAddrSet, err = getNamespaceAddressSet(addressSetFactory, controllerName, nqosState.namespace) + } else { + nqosState.SrcAddrSet, err = addressSetFactory.EnsureAddressSet(GetNetworkQoSAddrSetDbIDs(nqosState.namespace, nqosState.name, "src", "0", controllerName)) + } + if err != nil { + return fmt.Errorf("failed to init source address set for %s/%s: %w", nqosState.namespace, nqosState.name, err) + } + // ensure destination address sets + for ruleIndex, rule := range nqosState.EgressRules { + for destIndex, dest := range rule.Classifier.Destinations { + if dest.NamespaceSelector == nil && dest.PodSelector == nil { + continue + } + dest.DestAddrSet, err = addressSetFactory.EnsureAddressSet(GetNetworkQoSAddrSetDbIDs(nqosState.namespace, nqosState.name, strconv.Itoa(ruleIndex), strconv.Itoa(destIndex), controllerName)) + if err != nil { + return fmt.Errorf("failed to init destination address set for %s/%s: %w", nqosState.namespace, nqosState.name, err) + } + } + } + return nil +} + +func (nqosState *networkQoSState) matchSourceSelector(pod *corev1.Pod) bool { + if pod.Namespace != nqosState.namespace { + return false + } + if nqosState.PodSelector == nil { + return true + } + return nqosState.PodSelector.Matches(labels.Set(pod.Labels)) +} + +func (nqosState *networkQoSState) configureSourcePod(ctrl *Controller, pod *corev1.Pod, addresses []string) error { + fullPodName := joinMetaNamespaceAndName(pod.Namespace, pod.Name) + if nqosState.PodSelector != nil { + // if PodSelector is nil, use namespace's address set, so unnecessary to add ip here + if err := nqosState.SrcAddrSet.AddAddresses(addresses); err != nil { + return fmt.Errorf("failed to add addresses {%s} to address set %s for NetworkQoS %s/%s: %v", strings.Join(addresses, ","), nqosState.SrcAddrSet.GetName(), nqosState.namespace, nqosState.name, err) + } + nqosState.Pods.Store(fullPodName, addresses) + klog.V(4).Infof("Successfully added address (%s) of pod %s to address set %s", strings.Join(addresses, ","), fullPodName, nqosState.SrcAddrSet.GetName()) + } + // get switch name + switchName := ctrl.getLogicalSwitchName(pod.Spec.NodeName) + if switchName == "" { + return fmt.Errorf("failed to get logical switch name for node %s, topology %s", pod.Spec.NodeName, ctrl.TopologyType()) + } + + podList := []string{} + val, loaded := nqosState.SwitchRefs.Load(switchName) + if loaded { + podList = val.([]string) + } + + if !loaded { + klog.V(4).Infof("Adding NetworkQoS %s/%s to logical switch %s", nqosState.namespace, nqosState.name, switchName) + start := time.Now() + if err := ctrl.addQoSToLogicalSwitch(nqosState, switchName); err != nil { + return err + } + recordOvnOperationDuration("add", time.Since(start).Milliseconds()) + } + + podList = append(podList, fullPodName) + nqosState.SwitchRefs.Store(switchName, podList) + return nil +} + +func (nqosState *networkQoSState) removePodFromSource(ctrl *Controller, fullPodName string, addresses []string) error { + if len(addresses) == 0 { + // if no addresses is provided, try lookup in cache + if val, ok := nqosState.Pods.Load(fullPodName); ok { + addresses = val.([]string) + } + } + if len(addresses) > 0 && nqosState.PodSelector != nil { + // remove pod from non-namespace-scope source address set + if err := nqosState.SrcAddrSet.DeleteAddresses(addresses); err != nil { + return fmt.Errorf("failed to delete addresses (%s) from address set %s: %v", strings.Join(addresses, ","), nqosState.SrcAddrSet.GetName(), err) + } + } + nqosState.Pods.Delete(fullPodName) + return nqosState.removeZeroQoSNodes(ctrl, fullPodName) +} + +func (nqosState *networkQoSState) removeZeroQoSNodes(ctrl *Controller, fullPodName string) error { + zeroQoSSwitches := []string{} + // since node is unknown when pod is delete, iterate the SwitchRefs to remove the pod + nqosState.SwitchRefs.Range(func(key, val any) bool { + switchName := key.(string) + podList := val.([]string) + podList = slices.DeleteFunc(podList, func(s string) bool { + return s == fullPodName + }) + if len(podList) == 0 { + zeroQoSSwitches = append(zeroQoSSwitches, switchName) + } else { + nqosState.SwitchRefs.Store(switchName, podList) + } + return true + }) + // unbind qos from L3 logical switches where doesn't have source pods any more + if len(zeroQoSSwitches) > 0 && ctrl.TopologyType() == types.Layer3Topology { + start := time.Now() + if err := ctrl.removeQoSFromLogicalSwitches(nqosState, zeroQoSSwitches); err != nil { + return err + } + recordOvnOperationDuration("remove", time.Since(start).Milliseconds()) + for _, lsw := range zeroQoSSwitches { + nqosState.SwitchRefs.Delete(lsw) + } + } + return nil +} + +func (nqosState *networkQoSState) getAddressSetHashNames() []string { + addrsetNames := []string{} + if nqosState.SrcAddrSet != nil { + v4Hash, v6Hash := nqosState.SrcAddrSet.GetASHashNames() + addrsetNames = append(addrsetNames, v4Hash, v6Hash) + } + for _, rule := range nqosState.EgressRules { + for _, dest := range rule.Classifier.Destinations { + if dest.DestAddrSet != nil { + v4Hash, v6Hash := dest.DestAddrSet.GetASHashNames() + addrsetNames = append(addrsetNames, v4Hash, v6Hash) + } + } + } + return addrsetNames +} + +func (nqosState *networkQoSState) cleanupStaleAddresses(addressSetMap map[string]sets.Set[string]) error { + if nqosState.SrcAddrSet != nil { + addresses := addressSetMap[nqosState.SrcAddrSet.GetName()] + v4Addresses, _ := nqosState.SrcAddrSet.GetAddresses() + staleAddresses := []string{} + for _, address := range v4Addresses { + if !addresses.Has(address) { + staleAddresses = append(staleAddresses, address) + } + } + if len(staleAddresses) > 0 { + if err := nqosState.SrcAddrSet.DeleteAddresses(staleAddresses); err != nil { + return err + } + } + } + for _, egress := range nqosState.EgressRules { + for _, dest := range egress.Classifier.Destinations { + if dest.DestAddrSet == nil { + continue + } + addresses := addressSetMap[dest.DestAddrSet.GetName()] + v4Addresses, _ := dest.DestAddrSet.GetAddresses() + staleAddresses := []string{} + for _, address := range v4Addresses { + if !addresses.Has(address) { + staleAddresses = append(staleAddresses, address) + } + } + if len(staleAddresses) > 0 { + if err := dest.DestAddrSet.DeleteAddresses(staleAddresses); err != nil { + return err + } + } + } + } + return nil +} + +type GressRule struct { + Priority int + Dscp int + Classifier *Classifier + + // bandwitdh + Rate *int + Burst *int +} + +type trafficDirection string + +const ( + trafficDirSource trafficDirection = "src" + trafficDirDest trafficDirection = "dst" +) + +type Classifier struct { + Destinations []*Destination + Ports []*networkqosv1alpha1.Port +} + +// ToQosMatchString generates dest and protocol/port part of QoS match string, based on +// Classifier's destinations, protocol and port fields, example: +// (ip4.dst == $addr_set_name || (ip4.dst == 128.116.0.0/17 && ip4.dst != {128.116.0.0,128.116.0.255})) && tcp && tcp.dst == 8080 +// Multiple destinations will be connected by "||". +// See https://github.com/ovn-org/ovn/blob/2bdf1129c19d5bd2cd58a3ddcb6e2e7254b05054/ovn-nb.xml#L2942-L3025 for details +func (c *Classifier) ToQosMatchString(ipv4Enabled, ipv6Enabled bool) string { + if c == nil { + return "" + } + destMatchStrings := []string{} + for _, dest := range c.Destinations { + match := "ip4.dst == 0.0.0.0/0 || ip6.dst == ::/0" + if dest.DestAddrSet != nil { + match = addressSetToMatchString(dest.DestAddrSet, trafficDirDest, ipv4Enabled, ipv6Enabled) + } else if dest.IpBlock != nil && dest.IpBlock.CIDR != "" { + ipVersion := "ip4" + if utilnet.IsIPv6CIDRString(dest.IpBlock.CIDR) { + ipVersion = "ip6" + } + if len(dest.IpBlock.Except) == 0 { + match = fmt.Sprintf("%s.%s == %s", ipVersion, trafficDirDest, dest.IpBlock.CIDR) + } else { + match = fmt.Sprintf("%s.%s == %s && %s.%s != {%s}", ipVersion, trafficDirDest, dest.IpBlock.CIDR, ipVersion, trafficDirDest, strings.Join(dest.IpBlock.Except, ",")) + } + } + destMatchStrings = append(destMatchStrings, match) + } + + output := "" + if len(destMatchStrings) == 1 { + output = destMatchStrings[0] + } else { + for index, str := range destMatchStrings { + if index > 0 { + output += " || " + } + if strings.Contains(str, "||") || strings.Contains(str, "&&") { + output = output + fmt.Sprintf("(%s)", str) + } else { + output = output + str + } + } + } + if strings.Contains(output, "||") { + output = fmt.Sprintf("(%s)", output) + } + protoPortMap := map[string][]string{} + for _, port := range c.Ports { + if port.Protocol == "" { + continue + } + protocol := strings.ToLower(port.Protocol) + ports := protoPortMap[protocol] + if ports == nil { + ports = []string{} + } + if port.Port != nil { + ports = append(ports, fmt.Sprintf("%d", *port.Port)) + } + protoPortMap[protocol] = ports + } + + sortedProtocols := make([]string, 0, len(protoPortMap)) + for protocol := range protoPortMap { + sortedProtocols = append(sortedProtocols, protocol) + } + sort.Strings(sortedProtocols) + + portMatches := []string{} + for _, protocol := range sortedProtocols { + ports := protoPortMap[protocol] + match := protocol + if len(ports) == 1 { + match = fmt.Sprintf("%s && %s.dst == %s", protocol, protocol, ports[0]) + } else if len(ports) > 1 { + match = fmt.Sprintf("%s && %s.dst == {%s}", protocol, protocol, strings.Join(ports, ",")) + } + portMatches = append(portMatches, match) + } + if len(portMatches) == 1 { + output = fmt.Sprintf("%s && %s", output, portMatches[0]) + } else if len(portMatches) > 1 { + output = fmt.Sprintf("%s && ((%s))", output, strings.Join(portMatches, ") || (")) + } + return output +} + +type Destination struct { + IpBlock *knet.IPBlock + + DestAddrSet addressset.AddressSet + PodSelector labels.Selector + Pods sync.Map // pods name -> ips in the destAddrSet + NamespaceSelector labels.Selector +} + +func (dest *Destination) matchPod(podNs *corev1.Namespace, pod *corev1.Pod, qosNamespace string) bool { + switch { + case dest.NamespaceSelector != nil && dest.PodSelector != nil: + return dest.NamespaceSelector.Matches(labels.Set(podNs.Labels)) && dest.PodSelector.Matches(labels.Set(pod.Labels)) + case dest.NamespaceSelector == nil && dest.PodSelector != nil: + return pod.Namespace == qosNamespace && dest.PodSelector.Matches(labels.Set(pod.Labels)) + case dest.NamespaceSelector != nil && dest.PodSelector == nil: + return dest.NamespaceSelector.Matches(labels.Set(podNs.Labels)) + default: //dest.NamespaceSelector == nil && dest.PodSelector == nil: + return false + } +} + +func (dest *Destination) addPod(podNamespace, podName string, addresses []string) error { + if err := dest.DestAddrSet.AddAddresses(addresses); err != nil { + return err + } + // add pod to map + dest.Pods.Store(joinMetaNamespaceAndName(podNamespace, podName), addresses) + return nil +} + +func (dest *Destination) removePod(fullPodName string, addresses []string) error { + if len(addresses) == 0 { + val, ok := dest.Pods.Load(fullPodName) + if ok && val != nil { + addresses = val.([]string) + } + } + if err := dest.DestAddrSet.DeleteAddresses(addresses); err != nil { + return fmt.Errorf("failed to remove addresses (%s): %v", strings.Join(addresses, ","), err) + } + dest.Pods.Delete(fullPodName) + return nil +} + +func getQoSRulePriority(qosPriority, ruleIndex int) int { + return 10000 + qosPriority*10 + ruleIndex +} diff --git a/go-controller/pkg/ovn/controller/network_qos/utils.go b/go-controller/pkg/ovn/controller/network_qos/utils.go new file mode 100644 index 0000000000..7d58a7ad3e --- /dev/null +++ b/go-controller/pkg/ovn/controller/network_qos/utils.go @@ -0,0 +1,84 @@ +package networkqos + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + + libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + ovnkutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +func joinMetaNamespaceAndName(namespace, name string, separator ...string) string { + if namespace == "" { + return name + } + sep := "/" + if len(separator) > 0 { + sep = separator[0] + } + return namespace + sep + name +} + +func GetNetworkQoSAddrSetDbIDs(nqosNamespace, nqosName, ruleIndex, ipBlockIndex, controller string) *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNetworkQoS, controller, + map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: joinMetaNamespaceAndName(nqosNamespace, nqosName, ":"), + // rule index is the unique id for address set within given objectName + libovsdbops.RuleIndex: ruleIndex, + libovsdbops.IpBlockIndexKey: ipBlockIndex, + }) +} + +func getPodAddresses(pod *corev1.Pod, networkInfo ovnkutil.NetInfo) ([]string, error) { + // check annotation "k8s.ovn.org/pod-networks" before calling GetPodIPsOfNetwork, + // as it's no easy to check if the error is caused by missing annotation, while + // we don't want to return error for such case as it will trigger retry + _, ok := pod.Annotations[ovnkutil.OvnPodAnnotationName] + if !ok { + // pod hasn't been annotated yet, return nil to avoid retry + return nil, nil + } + ips, err := ovnkutil.GetPodIPsOfNetwork(pod, networkInfo) + if err != nil { + return nil, err + } + addresses := []string{} + for _, ip := range ips { + addresses = append(addresses, ip.String()) + } + return addresses, nil +} + +func generateNetworkQoSMatch(qosState *networkQoSState, rule *GressRule, ipv4Enabled, ipv6Enabled bool) string { + match := addressSetToMatchString(qosState.SrcAddrSet, trafficDirSource, ipv4Enabled, ipv6Enabled) + + classiferMatchString := rule.Classifier.ToQosMatchString(ipv4Enabled, ipv6Enabled) + if classiferMatchString != "" { + match = match + " && " + classiferMatchString + } + + return match +} + +func addressSetToMatchString(addrset addressset.AddressSet, dir trafficDirection, ipv4Enabled, ipv6Enabled bool) string { + ipv4AddrSetHashName, ipv6AddrSetHashName := addrset.GetASHashNames() + output := "" + switch { + case ipv4Enabled && ipv6Enabled: + output = fmt.Sprintf("(ip4.%s == {$%s} || ip6.%s == {$%s})", dir, ipv4AddrSetHashName, dir, ipv6AddrSetHashName) + case ipv4Enabled: + output = fmt.Sprintf("ip4.%s == {$%s}", dir, ipv4AddrSetHashName) + case ipv6Enabled: + output = fmt.Sprintf("ip6.%s == {$%s}", dir, ipv6AddrSetHashName) + } + return output +} + +func getNamespaceAddressSet(addressSetFactory addressset.AddressSetFactory, controllerName, namespace string) (addressset.AddressSet, error) { + dbIDs := libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNamespace, controllerName, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: namespace, + }) + return addressSetFactory.EnsureAddressSet(dbIDs) +} diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 705a4da468..cf6886e846 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -31,11 +31,8 @@ import ( svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/unidling" dnsnameresolver "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/dns_name_resolver" - aclsyncer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/acl" - addrsetsyncer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/address_set" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/logical_router_policy" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/nat" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/external_ids_syncer/port_group" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/routeimport" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/topology" @@ -296,35 +293,7 @@ func (oc *DefaultNetworkController) newRetryFramework( } func (oc *DefaultNetworkController) syncDb() error { - // sync address sets and ACLs, only required for network controller, since any old objects in the db without - // Owner set are owned by the default network controller. - // The order of syncs is important, since the next syncer may rely on the data updated by the previous one. - addrSetSyncer := addrsetsyncer.NewAddressSetSyncer(oc.nbClient, oc.controllerName) - err := addrSetSyncer.SyncAddressSets() - if err != nil { - return fmt.Errorf("failed to sync address sets on controller init: %v", err) - } - - existingNodes, err := oc.kube.GetNodes() - if err != nil { - return fmt.Errorf("failed to get existing nodes: %w", err) - } - aclSyncer := aclsyncer.NewACLSyncer(oc.nbClient, oc.controllerName) - err = aclSyncer.SyncACLs(existingNodes) - if err != nil { - return fmt.Errorf("failed to sync acls on controller init: %v", err) - } - - // port groups should be synced only once across all controllers (as port groups were used by secondary network - // controllers before dbIDs, but SyncPortGroups knows how to get this info from the old ExternalIDs, that is also - // why it doesn't have controllerName as an argument). - // Do it here since DefaultNetworkController is always created, and this sync has dependencies with the other syncs - // in this function. It uses acl.ExternalIDs[libovsdbops.ObjectNameKey.String()] to fetch namespace name for a - // referenced port group (thus, SyncACLs should be called before). - portGroupSyncer := port_group.NewPortGroupSyncer(oc.nbClient) - if err = portGroupSyncer.SyncPortGroups(); err != nil { - return fmt.Errorf("failed to sync port groups on controller init: %v", err) - } + var err error // sync shared resources // pod selector address sets err = oc.cleanupPodSelectorAddressSets() @@ -574,6 +543,19 @@ func (oc *DefaultNetworkController) run(_ context.Context) error { } } + if config.OVNKubernetesFeature.EnableNetworkQoS { + err := oc.newNetworkQoSController() + if err != nil { + return fmt.Errorf("unable to create network qos controller, err: %w", err) + } + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + // Until we have scale issues in future let's spawn only one thread + oc.nqosController.Run(1, oc.stopChan) + }() + } + end := time.Since(start) klog.Infof("Completing all the Watchers took %v", end) metrics.MetricOVNKubeControllerSyncDuration.WithLabelValues("all watchers").Set(end.Seconds()) @@ -943,6 +925,8 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int newNodeIsLocalZoneNode := h.oc.isLocalZoneNode(newNode) zoneClusterChanged := h.oc.nodeZoneClusterChanged(oldNode, newNode, newNodeIsLocalZoneNode, types.DefaultNetworkName) nodeSubnetChange := nodeSubnetChanged(oldNode, newNode, types.DefaultNetworkName) + nodeEncapIPsChanged := util.NodeEncapIPsChanged(oldNode, newNode) + var aggregatedErrors []error if newNodeIsLocalZoneNode { var nodeSyncsParam *nodeSyncs @@ -989,7 +973,8 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int // Check if the node moved from local zone to remote zone and if so syncZoneIC should be set to true. // Also check if node subnet changed, so static routes are properly set // Also check if the node is used to be a hybrid overlay node - syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) || nodeSubnetChange || zoneClusterChanged || primaryAddrChanged(oldNode, newNode) || switchToOvnNode + syncZoneIC = syncZoneIC || h.oc.isLocalZoneNode(oldNode) || nodeSubnetChange || zoneClusterChanged || + switchToOvnNode || nodeEncapIPsChanged if syncZoneIC { klog.Infof("Node %s in remote zone %s needs interconnect zone sync up. Zone cluster changed: %v", newNode.Name, util.GetNodeZone(newNode), zoneClusterChanged) diff --git a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_suite_test.go b/go-controller/pkg/ovn/external_ids_syncer/acl/acl_suite_test.go deleted file mode 100644 index 3d4223369a..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_suite_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package acl_test - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -func TestAcl(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "Acl Suite") -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go b/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go deleted file mode 100644 index 6e6a734b71..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go +++ /dev/null @@ -1,499 +0,0 @@ -package acl - -import ( - "fmt" - "sort" - "strconv" - "strings" - "time" - - corev1 "k8s.io/api/core/v1" - knet "k8s.io/api/networking/v1" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" - - libovsdbclient "github.com/ovn-org/libovsdb/client" - "github.com/ovn-org/libovsdb/ovsdb" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/batching" -) - -const ( - defaultDenyPolicyTypeACLExtIdKey = "default-deny-policy-type" - mcastDefaultDenyID = "DefaultDeny" - mcastAllowInterNodeID = "AllowInterNode" - l4MatchACLExtIdKey = "l4Match" - ipBlockCIDRACLExtIdKey = "ipblock_cidr" - namespaceACLExtIdKey = "namespace" - policyACLExtIdKey = "policy" - policyTypeACLExtIdKey = "policy_type" - policyTypeNumACLExtIdKey = "%s_num" - emptyIdx = -1 - defaultDenyACL = "defaultDeny" - arpAllowACL = "arpAllow" - // staleArpAllowPolicyMatch "was" the old match used when creating default allow ARP ACLs for a namespace - // NOTE: This is succeed by arpAllowPolicyMatch to allow support for IPV6. This is currently only - // used when removing stale ACLs from the syncNetworkPolicy function and should NOT be used in any main logic. - staleArpAllowPolicyMatch = "arp" - egressFirewallACLExtIdKey = "egressFirewall" -) - -type ACLSyncer struct { - nbClient libovsdbclient.Client - controllerName string - // txnBatchSize is used to control how many acls will be updated with 1 db transaction. - txnBatchSize int -} - -// controllerName is the name of the new controller that should own all acls without controller -func NewACLSyncer(nbClient libovsdbclient.Client, controllerName string) *ACLSyncer { - return &ACLSyncer{ - nbClient: nbClient, - controllerName: controllerName, - // create time (which is the upper bound of how much time an update can take) for 20K ACLs - // (gress ACL were used for testing as the ones that have the biggest number of ExternalIDs) - // is ~4 sec, which is safe enough to not exceed 10 sec transaction timeout. - txnBatchSize: 20000, - } -} - -func (syncer *ACLSyncer) SyncACLs(existingNodes []*corev1.Node) error { - // stale acls don't have controller ID - legacyAclPred := libovsdbops.GetNoOwnerPredicate[*nbdb.ACL]() - legacyACLs, err := libovsdbops.FindACLsWithPredicate(syncer.nbClient, legacyAclPred) - if err != nil { - return fmt.Errorf("unable to find stale ACLs, cannot update stale data: %v", err) - } - - if len(legacyACLs) > 0 { - var updatedACLs []*nbdb.ACL - multicastACLs := syncer.updateStaleMulticastACLsDbIDs(legacyACLs) - klog.Infof("Found %d stale multicast ACLs", len(multicastACLs)) - updatedACLs = append(updatedACLs, multicastACLs...) - - allowFromNodeACLs := syncer.updateStaleNetpolNodeACLs(legacyACLs, existingNodes) - klog.Infof("Found %d stale allow from node ACLs", len(allowFromNodeACLs)) - updatedACLs = append(updatedACLs, allowFromNodeACLs...) - - gressPolicyACLs, err := syncer.updateStaleGressPolicies(legacyACLs) - if err != nil { - return fmt.Errorf("failed to update gress policy ACLs: %w", err) - } - klog.Infof("Found %d stale gress ACLs", len(gressPolicyACLs)) - updatedACLs = append(updatedACLs, gressPolicyACLs...) - - defaultDenyACLs, deleteACLs, err := syncer.updateStaleDefaultDenyNetpolACLs(legacyACLs) - if err != nil { - return fmt.Errorf("failed to update stale default deny netpol ACLs: %w", err) - } - klog.Infof("Found %d stale default deny netpol ACLs", len(defaultDenyACLs)) - updatedACLs = append(updatedACLs, defaultDenyACLs...) - - egressFirewallACLs := syncer.updateStaleEgressFirewallACLs(legacyACLs) - klog.Infof("Found %d stale egress firewall ACLs", len(gressPolicyACLs)) - updatedACLs = append(updatedACLs, egressFirewallACLs...) - - // delete stale duplicating acls first - _, err = libovsdbops.TransactAndCheck(syncer.nbClient, deleteACLs) - if err != nil { - return fmt.Errorf("faile to trasact db ops: %v", err) - } - - // make sure there is only 1 ACL with any given primary ID - // 1. collect all existing primary IDs via predicate that will update IDs set, but always return false - existingACLPrimaryIDs := sets.Set[string]{} - _, err = libovsdbops.FindACLsWithPredicate(syncer.nbClient, func(acl *nbdb.ACL) bool { - if acl.ExternalIDs[libovsdbops.PrimaryIDKey.String()] != "" { - existingACLPrimaryIDs.Insert(acl.ExternalIDs[libovsdbops.PrimaryIDKey.String()]) - } - return false - }) - if err != nil { - return fmt.Errorf("failed to find exisitng primary ID acls: %w", err) - } - // 2. Check to-be-updated ACLs don't have the same PrimaryID between themselves and with the existingACLPrimaryIDs - uniquePrimaryIDACLs := []*nbdb.ACL{} - for _, acl := range updatedACLs { - primaryID := acl.ExternalIDs[libovsdbops.PrimaryIDKey.String()] - if existingACLPrimaryIDs.Has(primaryID) { - // don't update that acl, otherwise 2 ACLs with the same primary ID will be in the db - klog.Warningf("Skip updating ACL %+v to the new ExternalIDs, since there is another ACL with the same primary ID", acl) - } else { - existingACLPrimaryIDs.Insert(primaryID) - uniquePrimaryIDACLs = append(uniquePrimaryIDACLs, acl) - } - } - - // update acls with new ExternalIDs - err = batching.Batch[*nbdb.ACL](syncer.txnBatchSize, uniquePrimaryIDACLs, func(batchACLs []*nbdb.ACL) error { - return libovsdbops.CreateOrUpdateACLs(syncer.nbClient, nil, batchACLs...) - }) - if err != nil { - return fmt.Errorf("cannot update stale ACLs: %v", err) - } - - // There may be very old acls that are not selected by any of the syncers, delete them. - // One example is stale multicast ACLs with the old priority that was accidentally changed by - // https://github.com/ovn-org/ovn-kubernetes/commit/f68d302664e64093c867c0b9efe08d1d757d6780#diff-cc83e19af1c257d5a09b711d5977d8f8c20beb34b7b5d3eb37b2f2c53ded1bf7L537-R462 - leftoverACLs, err := libovsdbops.FindACLsWithPredicate(syncer.nbClient, legacyAclPred) - if err != nil { - return fmt.Errorf("unable to find leftover ACLs, cannot update stale data: %v", err) - } - p := func(_ *nbdb.LogicalSwitch) bool { return true } - err = libovsdbops.RemoveACLsFromLogicalSwitchesWithPredicate(syncer.nbClient, p, leftoverACLs...) - if err != nil { - return fmt.Errorf("unable delete leftover ACLs from switches: %v", err) - } - err = libovsdbops.DeleteACLsFromAllPortGroups(syncer.nbClient, leftoverACLs...) - if err != nil { - return fmt.Errorf("unable delete leftover ACLs from port groups: %v", err) - } - } - - // Once all the staleACLs are deleted and the externalIDs have been updated (externalIDs update should be a one-time - // upgrade operation), let us now update the tier's of all existing ACLs to types.DefaultACLTier. During upgrades after - // the OVN schema changes are applied, the nbdb.ACL.Tier column will be added and every row will be updated to 0 by - // default (types.PrimaryACLTier). For all features using ACLs (egressFirewall, NetworkPolicy, NodeACLs) we want to - // move them to Tier2. We need to do this in reverse order of ACL priority to avoid network traffic disruption during - // upgrades window (if not done according to priorities we might end up in a window where the ACL with priority 1000 - // for default deny is in tier0 while 1001 ACL for allow-ing traffic is in tier2 for a given namespace network policy). - // NOTE: This is a one-time operation as no ACLs should ever be created in types.PrimaryACLTier moving forward. - // Fetch all ACLs in types.PrimaryACLTier (Tier0); update their Tier to 2 and batch the ACL update. - klog.Info("Updating Tier of existing ACLs...") - start := time.Now() - aclPred := func(item *nbdb.ACL) bool { - return item.Tier == types.PrimaryACLTier - } - aclsInTier0, err := libovsdbops.FindACLsWithPredicate(syncer.nbClient, aclPred) - if err != nil { - return fmt.Errorf("unable to fetch Tier0 ACLs: %v", err) - } - if len(aclsInTier0) > 0 { - sort.Slice(aclsInTier0, func(i, j int) bool { - return aclsInTier0[i].Priority < aclsInTier0[j].Priority - }) // O(nlogn); unstable sort - for _, acl := range aclsInTier0 { - acl := acl - acl.Tier = types.DefaultACLTier // move tier to 2 - } - // batch ACLs together in order of their priority: lowest first and then highest - err = batching.Batch[*nbdb.ACL](syncer.txnBatchSize, aclsInTier0, func(batchACLs []*nbdb.ACL) error { - return libovsdbops.CreateOrUpdateACLs(syncer.nbClient, nil, batchACLs...) - }) - if err != nil { - return fmt.Errorf("cannot update ACLs to tier2: %v", err) - } - } - klog.Infof("Updating tier's of all ACLs in cluster took %v", time.Since(start)) - return nil -} - -func (syncer *ACLSyncer) getDefaultMcastACLDbIDs(mcastType, policyDirection string) *libovsdbops.DbObjectIDs { - // there are 2 types of default multicast ACLs in every direction (Ingress/Egress) - // DefaultDeny = deny multicast by default - // AllowInterNode = allow inter-node multicast - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLMulticastCluster, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.TypeKey: mcastType, - libovsdbops.PolicyDirectionKey: policyDirection, - }) - -} - -func (syncer *ACLSyncer) getNamespaceMcastACLDbIDs(ns, policyDirection string) *libovsdbops.DbObjectIDs { - // namespaces ACL - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLMulticastNamespace, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: ns, - libovsdbops.PolicyDirectionKey: policyDirection, - }) -} - -// updateStaleMulticastACLsDbIDs updates multicast ACLs that don't have new ExternalIDs set. -// Must be run before WatchNamespace, since namespaceSync function uses syncNsMulticast, which relies on the new IDs. -func (syncer *ACLSyncer) updateStaleMulticastACLsDbIDs(legacyACLs []*nbdb.ACL) []*nbdb.ACL { - updatedACLs := []*nbdb.ACL{} - for _, acl := range legacyACLs { - var dbIDs *libovsdbops.DbObjectIDs - if acl.Priority == types.DefaultMcastDenyPriority { - // there is only 1 type acl type with this priority: default deny - dbIDs = syncer.getDefaultMcastACLDbIDs(mcastDefaultDenyID, acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey]) - } else if acl.Priority == types.DefaultMcastAllowPriority { - // there are 4 multicast allow types - if acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] == string(knet.PolicyTypeIngress) { - // ingress allow acl - // either default of namespaced - if strings.Contains(acl.Match, types.ClusterRtrPortGroupNameBase) { - // default allow ingress - dbIDs = syncer.getDefaultMcastACLDbIDs(mcastAllowInterNodeID, string(knet.PolicyTypeIngress)) - } else { - // namespace allow ingress - // acl Name can be truncated (max length 64), but k8s namespace is limited to 63 symbols, - // therefore it is safe to extract it from the name - ns := strings.Split(libovsdbops.GetACLName(acl), "_")[0] - dbIDs = syncer.getNamespaceMcastACLDbIDs(ns, string(knet.PolicyTypeIngress)) - } - } else if acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] == string(knet.PolicyTypeEgress) { - // egress allow acl - // either default of namespaced - if strings.Contains(acl.Match, types.ClusterRtrPortGroupNameBase) { - // default allow egress - dbIDs = syncer.getDefaultMcastACLDbIDs(mcastAllowInterNodeID, string(knet.PolicyTypeEgress)) - } else { - // namespace allow egress - // acl Name can be truncated (max length 64), but k8s namespace is limited to 63 symbols, - // therefore it is safe to extract it from the name - ns := strings.Split(libovsdbops.GetACLName(acl), "_")[0] - dbIDs = syncer.getNamespaceMcastACLDbIDs(ns, string(knet.PolicyTypeEgress)) - } - } else { - // unexpected, acl with multicast priority should have ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] set - klog.Warningf("Found stale ACL with multicast priority %d, but without expected ExternalID[%s]: %+v", - acl.Priority, defaultDenyPolicyTypeACLExtIdKey, acl) - continue - } - } else { - //non-multicast acl - continue - } - // update externalIDs - acl.ExternalIDs = dbIDs.GetExternalIDs() - updatedACLs = append(updatedACLs, acl) - } - return updatedACLs -} - -func (syncer *ACLSyncer) getAllowFromNodeACLDbIDs(nodeName, mgmtPortIP string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLNetpolNode, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: nodeName, - libovsdbops.IpKey: mgmtPortIP, - }) -} - -// updateStaleNetpolNodeACLs updates allow from node ACLs, that don't have new ExternalIDs based -// on DbObjectIDs set. Allow from node acls are applied on the node switch, therefore the cleanup for deleted is not needed, -// since acl will be deleted toegther with the node switch. -func (syncer *ACLSyncer) updateStaleNetpolNodeACLs(legacyACLs []*nbdb.ACL, existingNodes []*corev1.Node) []*nbdb.ACL { - // ACL to allow traffic from host via management port has no name or ExternalIDs - // The only way to find it is by exact match - type aclInfo struct { - nodeName string - ip string - } - matchToNode := map[string]aclInfo{} - for _, node := range existingNodes { - node := *node - hostSubnets, err := util.ParseNodeHostSubnetAnnotation(&node, types.DefaultNetworkName) - if err != nil { - klog.Warningf("Couldn't parse hostSubnet annotation for node %s: %v", node.Name, err) - continue - } - for _, hostSubnet := range hostSubnets { - mgmtIfAddr := util.GetNodeManagementIfAddr(hostSubnet) - ipFamily := "ip4" - if utilnet.IsIPv6(mgmtIfAddr.IP) { - ipFamily = "ip6" - } - match := fmt.Sprintf("%s.src==%s", ipFamily, mgmtIfAddr.IP.String()) - matchToNode[match] = aclInfo{ - nodeName: node.Name, - ip: mgmtIfAddr.IP.String(), - } - } - } - updatedACLs := []*nbdb.ACL{} - for _, acl := range legacyACLs { - if _, ok := matchToNode[acl.Match]; ok { - aclInfo := matchToNode[acl.Match] - dbIDs := syncer.getAllowFromNodeACLDbIDs(aclInfo.nodeName, aclInfo.ip) - // Update ExternalIDs and Name based on new dbIndex - acl.ExternalIDs = dbIDs.GetExternalIDs() - updatedACLs = append(updatedACLs, acl) - } - } - return updatedACLs -} - -func (syncer *ACLSyncer) getNetpolGressACLDbIDs(policyNamespace, policyName, policyType string, - gressIdx, portPolicyIdx, ipBlockIdx int) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLNetworkPolicyPortIndex, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - // policy namespace+name - libovsdbops.ObjectNameKey: policyNamespace + ":" + policyName, - // egress or ingress - libovsdbops.PolicyDirectionKey: policyType, - // gress rule index - libovsdbops.GressIdxKey: strconv.Itoa(gressIdx), - // acls are created for every gp.portPolicies: - // - for empty policy (no selectors and no ip blocks) - empty ACL - // OR - // - all selector-based peers ACL - // - for every IPBlock +1 ACL - // Therefore unique id for given gressPolicy is portPolicy idx + IPBlock idx - // (empty policy and all selector-based peers ACLs will have idx=-1) - libovsdbops.PortPolicyIndexKey: strconv.Itoa(portPolicyIdx), - libovsdbops.IpBlockIndexKey: strconv.Itoa(ipBlockIdx), - }) -} - -func (syncer *ACLSyncer) updateStaleGressPolicies(legacyACLs []*nbdb.ACL) (updatedACLs []*nbdb.ACL, err error) { - if len(legacyACLs) == 0 { - return - } - // for every gress policy build mapping to count port policies. - // l4MatchACLExtIdKey was previously assigned based on port policy, - // we can just assign idx to every port Policy and make sure there are no equal ACLs. - gressPolicyPortCount := map[string]map[string]int{} - for _, acl := range legacyACLs { - if acl.ExternalIDs[policyTypeACLExtIdKey] == "" { - // not gress ACL - continue - } - policyNamespace := acl.ExternalIDs[namespaceACLExtIdKey] - policyName := acl.ExternalIDs[policyACLExtIdKey] - policyType := acl.ExternalIDs[policyTypeACLExtIdKey] - idxKey := fmt.Sprintf(policyTypeNumACLExtIdKey, policyType) - idx, err := strconv.Atoi(acl.ExternalIDs[idxKey]) - if err != nil { - return nil, fmt.Errorf("unable to parse gress policy idx %s: %v", - acl.ExternalIDs[idxKey], err) - } - var ipBlockIdx int - // ipBlockCIDRACLExtIdKey is "false" for non-ipBlock ACLs. - // Then for the first ipBlock in a given gress policy it is "true", - // and for the rest of them is idx+1 - if acl.ExternalIDs[ipBlockCIDRACLExtIdKey] == "true" { - ipBlockIdx = 0 - } else if acl.ExternalIDs[ipBlockCIDRACLExtIdKey] == "false" { - ipBlockIdx = emptyIdx - } else { - ipBlockIdx, err = strconv.Atoi(acl.ExternalIDs[ipBlockCIDRACLExtIdKey]) - if err != nil { - return nil, fmt.Errorf("unable to parse gress policy ipBlockCIDRACLExtIdKey %s: %v", - acl.ExternalIDs[ipBlockCIDRACLExtIdKey], err) - } - ipBlockIdx -= 1 - } - gressACLID := strings.Join([]string{policyNamespace, policyName, policyType, fmt.Sprintf("%d", idx)}, "_") - if gressPolicyPortCount[gressACLID] == nil { - gressPolicyPortCount[gressACLID] = map[string]int{} - } - var portIdx int - l4Match := acl.ExternalIDs[l4MatchACLExtIdKey] - if l4Match == libovsdbutil.UnspecifiedL4Match { - portIdx = emptyIdx - } else { - if _, ok := gressPolicyPortCount[gressACLID][l4Match]; !ok { - // this l4MatchACLExtIdKey is new for given gressPolicy, assign the next idx to it - gressPolicyPortCount[gressACLID][l4Match] = len(gressPolicyPortCount[gressACLID]) - } - portIdx = gressPolicyPortCount[gressACLID][l4Match] - } - dbIDs := syncer.getNetpolGressACLDbIDs(policyNamespace, policyName, - policyType, idx, portIdx, ipBlockIdx) - acl.ExternalIDs = dbIDs.GetExternalIDs() - updatedACLs = append(updatedACLs, acl) - } - return -} - -func (syncer *ACLSyncer) getDefaultDenyPolicyACLIDs(ns, policyType, defaultACLType string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLNetpolNamespace, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: ns, - // in the same namespace there can be 2 default deny port groups, egress and ingress, - // every port group has default deny and arp allow acl. - libovsdbops.PolicyDirectionKey: policyType, - libovsdbops.TypeKey: defaultACLType, - }) -} - -func (syncer *ACLSyncer) updateStaleDefaultDenyNetpolACLs(legacyACLs []*nbdb.ACL) (updatedACLs []*nbdb.ACL, - deleteOps []ovsdb.Operation, err error) { - for _, acl := range legacyACLs { - // sync default Deny policies - // defaultDenyPolicyTypeACLExtIdKey ExternalID was used by default deny and multicast acls, - // but multicast acls have specific DefaultMcast priority, filter them out. - if acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] == "" || (acl.Priority != types.DefaultAllowPriority && - acl.Priority != types.DefaultDenyPriority) { - // not default deny policy - continue - } - - // remove stale egress and ingress allow arp ACLs that were leftover as a result - // of ACL migration for "ARPallowPolicy" when the match changed from "arp" to "(arp || nd)" - if strings.Contains(acl.Match, " && "+staleArpAllowPolicyMatch) { - pgName := "" - if strings.Contains(acl.Match, "inport") { - // egress default ARP allow policy ("inport == @a16323395479447859119_egressDefaultDeny && arp") - pgName = strings.TrimPrefix(acl.Match, "inport == @") - } else if strings.Contains(acl.Match, "outport") { - // ingress default ARP allow policy ("outport == @a16323395479447859119_ingressDefaultDeny && arp") - pgName = strings.TrimPrefix(acl.Match, "outport == @") - } - pgName = strings.TrimSuffix(pgName, " && "+staleArpAllowPolicyMatch) - deleteOps, err = libovsdbops.DeleteACLsFromPortGroupOps(syncer.nbClient, deleteOps, pgName, acl) - if err != nil { - err = fmt.Errorf("failed getting delete acl ops: %w", err) - return - } - // acl will be deleted, no need to update it - continue - } - - // acl Name can be truncated (max length 64), but k8s namespace is limited to 63 symbols, - // therefore it is safe to extract it from the name. - // works for both older name _ and newer - // _egressDefaultDeny OR _ingressDefaultDeny - ns := strings.Split(libovsdbops.GetACLName(acl), "_")[0] - - // distinguish ARPAllowACL from DefaultDeny - var defaultDenyACLType string - if strings.Contains(acl.Match, "(arp || nd)") { - defaultDenyACLType = arpAllowACL - } else { - defaultDenyACLType = defaultDenyACL - } - dbIDs := syncer.getDefaultDenyPolicyACLIDs(ns, acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey], defaultDenyACLType) - acl.ExternalIDs = dbIDs.GetExternalIDs() - updatedACLs = append(updatedACLs, acl) - } - return -} - -func (syncer *ACLSyncer) getEgressFirewallACLDbIDs(namespace string, ruleIdx int) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.ACLEgressFirewall, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: namespace, - libovsdbops.RuleIndex: strconv.Itoa(ruleIdx), - }) -} - -func (syncer *ACLSyncer) updateStaleEgressFirewallACLs(legacyACLs []*nbdb.ACL) []*nbdb.ACL { - updatedACLs := []*nbdb.ACL{} - for _, acl := range legacyACLs { - if acl.Priority < types.MinimumReservedEgressFirewallPriority || acl.Priority > types.EgressFirewallStartPriority { - // not egress firewall acl - continue - } - namespace, ok := acl.ExternalIDs[egressFirewallACLExtIdKey] - if !ok || namespace == "" { - klog.Errorf("Failed to sync stale egress firewall acl: expected non-empty %s key in ExternalIDs %+v", - egressFirewallACLExtIdKey, acl.ExternalIDs) - continue - } - // egress firewall ACL.priority = types.EgressFirewallStartPriority - rule.idx => - // rule.idx = types.EgressFirewallStartPriority - ACL.priority - dbIDs := syncer.getEgressFirewallACLDbIDs(namespace, types.EgressFirewallStartPriority-acl.Priority) - acl.ExternalIDs = dbIDs.GetExternalIDs() - updatedACLs = append(updatedACLs, acl) - } - return updatedACLs -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync_test.go b/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync_test.go deleted file mode 100644 index 0d6b22bfcf..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync_test.go +++ /dev/null @@ -1,836 +0,0 @@ -package acl - -import ( - "encoding/json" - "fmt" - "strings" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - - corev1 "k8s.io/api/core/v1" - knet "k8s.io/api/networking/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" -) - -const ( - ingressDefaultDenySuffix = "ingressDefaultDeny" - egressDefaultDenySuffix = "egressDefaultDeny" - arpAllowPolicySuffix = "ARPallowPolicy" - arpAllowPolicyMatch = "(arp || nd)" -) - -type aclSync struct { - before *nbdb.ACL - after *libovsdbops.DbObjectIDs -} - -func testSyncerWithData(data []aclSync, controllerName string, initialDbState, finalDbState []libovsdbtest.TestData, - existingNodes []*corev1.Node) { - // create initial db setup - pgBefore := &nbdb.PortGroup{ - UUID: types.ClusterPortGroupNameBase, - } - dbSetup := libovsdbtest.TestSetup{NBData: append(initialDbState, pgBefore)} - for _, asSync := range data { - if asSync.after != nil { - asSync.before.UUID = asSync.after.String() + "-UUID" - } else { - asSync.before.UUID = asSync.before.Match - } - pgBefore.ACLs = append(pgBefore.ACLs, asSync.before.UUID) - dbSetup.NBData = append(dbSetup.NBData, asSync.before) - } - libovsdbOvnNBClient, _, libovsdbCleanup, err := libovsdbtest.NewNBSBTestHarness(dbSetup) - defer libovsdbCleanup.Cleanup() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create expected data using addressSetFactory - var expectedDbState []libovsdbtest.TestData - if finalDbState != nil { - expectedDbState = finalDbState - } else { - expectedDbState = initialDbState - } - pgAfter := &nbdb.PortGroup{ - UUID: types.ClusterPortGroupNameBase, - } - expectedDbState = append(expectedDbState, pgAfter) - for _, aclSync := range data { - if aclSync.after != nil { - acl := aclSync.before.DeepCopy() - acl.ExternalIDs = aclSync.after.GetExternalIDs() - acl.Tier = types.DefaultACLTier - pgAfter.ACLs = append(pgAfter.ACLs, acl.UUID) - expectedDbState = append(expectedDbState, acl) - } - } - // run sync - syncer := NewACLSyncer(libovsdbOvnNBClient, controllerName) - err = syncer.SyncACLs(existingNodes) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // check results - gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDbState)) -} - -func joinACLName(substrings ...string) string { - return strings.Join(substrings, "_") -} - -func buildPortGroup(hashName, name string, ports []*nbdb.LogicalSwitchPort, acls []*nbdb.ACL) *nbdb.PortGroup { - externalIds := map[string]string{"name": name} - pg := nbdb.PortGroup{ - Name: hashName, - ExternalIDs: externalIds, - } - - if len(acls) > 0 { - pg.ACLs = make([]string, 0, len(acls)) - for _, acl := range acls { - pg.ACLs = append(pg.ACLs, acl.UUID) - } - } - - if len(ports) > 0 { - pg.Ports = make([]string, 0, len(ports)) - for _, port := range ports { - pg.Ports = append(pg.Ports, port.UUID) - } - } - return &pg -} - -var _ = ginkgo.Describe("OVN ACL Syncer", func() { - const ( - controllerName = "fake-controller" - namespace1 = "namespace1" - ) - var syncerToBuildData = ACLSyncer{ - controllerName: controllerName, - } - - ginkgo.It("doesn't add 2 acls with the same PrimaryID", func() { - testData := []aclSync{ - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterPortGroupNameBase, "DefaultDenyMulticastEgress"), - nbdb.ACLDirectionFromLport, - types.DefaultMcastDenyPriority, - "(ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - // syncer code is run before the net-pol handlers startup; thus realistically its tier0 at this point - // when we get add events for net-pol's later, this will get updated to tier2 if needed - // this is why we use the placeholder tier ACL for the tests in this file and in address_set_sync_test - // instead of the default tier ACL - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultMcastACLDbIDs(mcastDefaultDenyID, "Egress"), - }, - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterPortGroupNameBase, "DefaultDenyMulticastEgress"), - nbdb.ACLDirectionFromLport, - types.DefaultMcastDenyPriority, - "(ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - types.PrimaryACLTier, - ), - }, - } - testSyncerWithData(testData, controllerName, []libovsdbtest.TestData{}, nil, nil) - }) - ginkgo.It("updates multicast acls", func() { - testData := []aclSync{ - // defaultDenyEgressACL - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterPortGroupNameBase, "DefaultDenyMulticastEgress"), - nbdb.ACLDirectionFromLport, - types.DefaultMcastDenyPriority, - "(ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultMcastACLDbIDs(mcastDefaultDenyID, "Egress"), - }, - // defaultDenyIngressACL - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterPortGroupNameBase, "DefaultDenyMulticastIngress"), - nbdb.ACLDirectionToLport, - types.DefaultMcastDenyPriority, - "(ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Ingress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultMcastACLDbIDs(mcastDefaultDenyID, "Ingress"), - }, - // defaultAllowEgressACL - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterRtrPortGroupNameBase, "DefaultAllowMulticastEgress"), - nbdb.ACLDirectionFromLport, - types.DefaultMcastAllowPriority, - "inport == @clusterRtrPortGroup && (ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultMcastACLDbIDs(mcastAllowInterNodeID, "Egress"), - }, - // defaultAllowIngressACL - { - before: libovsdbops.BuildACL( - joinACLName(types.ClusterRtrPortGroupNameBase, "DefaultAllowMulticastIngress"), - nbdb.ACLDirectionToLport, - types.DefaultMcastAllowPriority, - "outport == @clusterRtrPortGroup && (ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Ingress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultMcastACLDbIDs(mcastAllowInterNodeID, "Ingress"), - }, - // nsAllowEgressACL - { - before: libovsdbops.BuildACL( - joinACLName(namespace1, "MulticastAllowEgress"), - nbdb.ACLDirectionFromLport, - types.DefaultMcastAllowPriority, - "inport == @a16982411286042166782 && ip4.mcast", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNamespaceMcastACLDbIDs(namespace1, "Egress"), - }, - // nsAllowIngressACL - { - before: libovsdbops.BuildACL( - joinACLName(namespace1, "MulticastAllowIngress"), - nbdb.ACLDirectionToLport, - types.DefaultMcastAllowPriority, - "outport == @a16982411286042166782 && (igmp || (ip4.src == $a4322231855293774466 && ip4.mcast))", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Ingress", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNamespaceMcastACLDbIDs(namespace1, "Ingress"), - }, - } - testSyncerWithData(testData, controllerName, []libovsdbtest.TestData{}, nil, nil) - }) - ginkgo.It("updates allow from node acls", func() { - nodeName := "node1" - ipv4MgmtIP := "10.244.0.2" - ipv6MgmtIP := "fd02:0:0:2::2" - - testData := []aclSync{ - // ipv4 acl - { - before: libovsdbops.BuildACL( - "", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip4.src=="+ipv4MgmtIP, - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - nil, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getAllowFromNodeACLDbIDs(nodeName, ipv4MgmtIP), - }, - // ipv6 acl - { - before: libovsdbops.BuildACL( - "", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip6.src=="+ipv6MgmtIP, - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - nil, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getAllowFromNodeACLDbIDs(nodeName, ipv6MgmtIP), - }, - } - hostSubnets := map[string][]string{types.DefaultNetworkName: {"10.244.0.0/24", "fd02:0:0:2::2895/64"}} - bytes, err := json.Marshal(hostSubnets) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - existingNodes := []*corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{"k8s.ovn.org/node-subnets": string(bytes)}, - }, - }, - } - testSyncerWithData(testData, controllerName, []libovsdbtest.TestData{}, nil, existingNodes) - }) - ginkgo.It("updates gress policy acls", func() { - policyNamespace := "policyNamespace" - policyName := "policyName" - testData := []aclSync{ - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+policyName+"_0", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip4.dst == 10.244.1.5/32 && inport == @a2653181086423119552", - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - l4MatchACLExtIdKey: "tcp && 6380<=tcp.dst<=7000", - ipBlockCIDRACLExtIdKey: "true", - namespaceACLExtIdKey: policyNamespace, - policyACLExtIdKey: policyName, - policyTypeACLExtIdKey: string(knet.PolicyTypeEgress), - fmt.Sprintf(policyTypeNumACLExtIdKey, knet.PolicyTypeEgress): "0", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNetpolGressACLDbIDs(policyNamespace, policyName, string(knet.PolicyTypeEgress), - 0, 0, 0), - }, - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+policyName+"_0", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip4.dst == 10.244.1.5/32 && inport == @a2653181086423119552", - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - l4MatchACLExtIdKey: "tcp && 6380<=tcp.dst<=7000", - ipBlockCIDRACLExtIdKey: "2", - namespaceACLExtIdKey: policyNamespace, - policyACLExtIdKey: policyName, - policyTypeACLExtIdKey: string(knet.PolicyTypeEgress), - fmt.Sprintf(policyTypeNumACLExtIdKey, knet.PolicyTypeEgress): "0", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNetpolGressACLDbIDs(policyNamespace, policyName, string(knet.PolicyTypeEgress), - 0, 0, 1), - }, - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+policyName+"_0", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip4.dst == 10.244.1.5/32 && inport == @a2653181086423119552", - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - l4MatchACLExtIdKey: "tcp && 1<=tcp.dst<=3", - ipBlockCIDRACLExtIdKey: "true", - namespaceACLExtIdKey: policyNamespace, - policyACLExtIdKey: policyName, - policyTypeACLExtIdKey: string(knet.PolicyTypeEgress), - fmt.Sprintf(policyTypeNumACLExtIdKey, knet.PolicyTypeEgress): "0", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNetpolGressACLDbIDs(policyNamespace, policyName, string(knet.PolicyTypeEgress), - 0, 1, 0), - }, - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+policyName+"_0", - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "ip4.dst == 10.244.1.5/32 && inport == @a2653181086423119552", - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - l4MatchACLExtIdKey: "tcp && 1<=tcp.dst<=3", - ipBlockCIDRACLExtIdKey: "2", - namespaceACLExtIdKey: policyNamespace, - policyACLExtIdKey: policyName, - policyTypeACLExtIdKey: string(knet.PolicyTypeEgress), - fmt.Sprintf(policyTypeNumACLExtIdKey, knet.PolicyTypeEgress): "0", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNetpolGressACLDbIDs(policyNamespace, policyName, string(knet.PolicyTypeEgress), - 0, 1, 1), - }, - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+policyName+"_0", - nbdb.ACLDirectionFromLport, - types.DefaultAllowPriority, - "(ip4.src == {$a3733136965153973077} || (ip4.src == 169.254.169.5 && ip4.dst == {$a3733136965153973077})) && outport == @a2653181086423119552", - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - l4MatchACLExtIdKey: libovsdbutil.UnspecifiedL4Match, - ipBlockCIDRACLExtIdKey: "false", - namespaceACLExtIdKey: policyNamespace, - policyACLExtIdKey: policyName, - policyTypeACLExtIdKey: string(knet.PolicyTypeIngress), - fmt.Sprintf(policyTypeNumACLExtIdKey, knet.PolicyTypeIngress): "0", - }, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getNetpolGressACLDbIDs(policyNamespace, policyName, string(knet.PolicyTypeIngress), - 0, emptyIdx, emptyIdx), - }, - } - testSyncerWithData(testData, controllerName, []libovsdbtest.TestData{}, nil, nil) - }) - ginkgo.It("updates default deny policy acls", func() { - policyNamespace := "policyNamespace" - - defaultDenyPortGroupName := func(namespace, gressSuffix string) string { - return joinACLName(util.HashForOVN(namespace), gressSuffix) - } - getStaleARPAllowACLName := func(ns string) string { - return joinACLName(ns, arpAllowPolicySuffix) - } - egressPGName := defaultDenyPortGroupName(policyNamespace, egressDefaultDenySuffix) - ingressPGName := defaultDenyPortGroupName(policyNamespace, ingressDefaultDenySuffix) - staleARPEgressACL := libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionFromLport, - types.DefaultAllowPriority, - "inport == @"+egressPGName+" && "+staleArpAllowPolicyMatch, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.DefaultACLTier, - ) - staleARPEgressACL.UUID = "staleARPEgressACL-UUID" - egressDenyPG := buildPortGroup( - egressPGName, - egressPGName, - nil, - []*nbdb.ACL{staleARPEgressACL}, - ) - egressDenyPG.UUID = egressDenyPG.Name + "-UUID" - - staleARPIngressACL := libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "outport == @"+ingressPGName+" && "+staleArpAllowPolicyMatch, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.DefaultACLTier, - ) - staleARPIngressACL.UUID = "staleARPIngressACL-UUID" - ingressDenyPG := buildPortGroup( - ingressPGName, - ingressPGName, - nil, - []*nbdb.ACL{staleARPIngressACL}, - ) - ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - initialDb := []libovsdbtest.TestData{staleARPEgressACL, egressDenyPG, staleARPIngressACL, ingressDenyPG} - finalEgressDenyPG := buildPortGroup( - egressPGName, - egressPGName, - nil, - nil, - ) - finalEgressDenyPG.UUID = finalEgressDenyPG.Name + "-UUID" - finalIngressDenyPG := buildPortGroup( - ingressPGName, - ingressPGName, - nil, - nil, - ) - finalIngressDenyPG.UUID = finalIngressDenyPG.Name + "-UUID" - finalDb := []libovsdbtest.TestData{finalEgressDenyPG, finalIngressDenyPG} - - testData := []aclSync{ - // egress deny - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+egressDefaultDenySuffix, - nbdb.ACLDirectionFromLport, - types.DefaultDenyPriority, - "inport == @"+egressPGName, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeEgress), defaultDenyACL), - }, - // egress allow ARP - { - before: libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionFromLport, - types.DefaultAllowPriority, - "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeEgress), arpAllowACL), - }, - // ingress deny - { - before: libovsdbops.BuildACL( - policyNamespace+"_"+ingressDefaultDenySuffix, - nbdb.ACLDirectionToLport, - types.DefaultDenyPriority, - "outport == @"+ingressPGName, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeIngress), defaultDenyACL), - }, - // ingress allow ARP - { - before: libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeIngress), arpAllowACL), - }, - } - testSyncerWithData(testData, controllerName, initialDb, finalDb, nil) - }) - ginkgo.It("updates default deny policy acl with long names", func() { - policyNamespace := "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk" // longest allowed namespace name - defaultDenyPortGroupName := func(namespace, gressSuffix string) string { - return joinACLName(util.HashForOVN(namespace), gressSuffix) - } - getStaleARPAllowACLName := func(ns string) string { - return joinACLName(ns, arpAllowPolicySuffix) - } - egressPGName := defaultDenyPortGroupName(policyNamespace, egressDefaultDenySuffix) - ingressPGName := defaultDenyPortGroupName(policyNamespace, ingressDefaultDenySuffix) - staleARPEgressACL := libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionFromLport, - types.DefaultAllowPriority, - "inport == @"+egressPGName+" && "+staleArpAllowPolicyMatch, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.DefaultACLTier, - ) - staleARPEgressACL.UUID = "staleARPEgressACL-UUID" - egressDenyPG := buildPortGroup( - egressPGName, - egressPGName, - nil, - []*nbdb.ACL{staleARPEgressACL}, - ) - egressDenyPG.UUID = egressDenyPG.Name + "-UUID" - - staleARPIngressACL := libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "outport == @"+ingressPGName+" && "+staleArpAllowPolicyMatch, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.DefaultACLTier, - ) - staleARPIngressACL.UUID = "staleARPIngressACL-UUID" - ingressDenyPG := buildPortGroup( - ingressPGName, - ingressPGName, - nil, - []*nbdb.ACL{staleARPIngressACL}, - ) - ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - initialDb := []libovsdbtest.TestData{staleARPEgressACL, egressDenyPG, staleARPIngressACL, ingressDenyPG} - finalEgressDenyPG := buildPortGroup( - egressPGName, - egressPGName, - nil, - nil, - ) - finalEgressDenyPG.UUID = finalEgressDenyPG.Name + "-UUID" - finalIngressDenyPG := buildPortGroup( - ingressPGName, - ingressPGName, - nil, - nil, - ) - finalIngressDenyPG.UUID = finalIngressDenyPG.Name + "-UUID" - finalDb := []libovsdbtest.TestData{finalEgressDenyPG, finalIngressDenyPG} - - testData := []aclSync{ - // egress deny - { - before: libovsdbops.BuildACL( - policyNamespace, - nbdb.ACLDirectionFromLport, - types.DefaultDenyPriority, - "inport == @"+egressPGName, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeEgress), defaultDenyACL), - }, - // egress allow ARP - { - before: libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionFromLport, - types.DefaultAllowPriority, - "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeEgress), arpAllowACL), - }, - // egress deny - { - before: libovsdbops.BuildACL( - policyNamespace, - nbdb.ACLDirectionToLport, - types.DefaultDenyPriority, - "outport == @"+ingressPGName, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeIngress), defaultDenyACL), - }, - // egress allow ARP - { - before: libovsdbops.BuildACL( - getStaleARPAllowACLName(policyNamespace), - nbdb.ACLDirectionToLport, - types.DefaultAllowPriority, - "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress)}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getDefaultDenyPolicyACLIDs(policyNamespace, string(knet.PolicyTypeIngress), arpAllowACL), - }, - } - testSyncerWithData(testData, controllerName, initialDb, finalDb, nil) - }) - ginkgo.It("updates egress firewall acls", func() { - testData := []aclSync{ - { - before: libovsdbops.BuildACL( - "random", - nbdb.ACLDirectionFromLport, - types.EgressFirewallStartPriority, - "any", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{egressFirewallACLExtIdKey: namespace1}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getEgressFirewallACLDbIDs(namespace1, 0), - }, - { - before: libovsdbops.BuildACL( - "random2", - nbdb.ACLDirectionFromLport, - types.EgressFirewallStartPriority-1, - "any2", - nbdb.ACLActionDrop, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{egressFirewallACLExtIdKey: namespace1}, - nil, - types.PrimaryACLTier, - ), - after: syncerToBuildData.getEgressFirewallACLDbIDs(namespace1, 1), - }, - } - testSyncerWithData(testData, controllerName, []libovsdbtest.TestData{}, nil, nil) - }) - ginkgo.It("deletes leftover multicast acls", func() { - egressACL := libovsdbops.BuildACL( - "", - nbdb.ACLDirectionFromLport, - types.DefaultRoutedMcastAllowPriority, - "inport == @"+types.ClusterRtrPortGroupNameBase+" && (ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Egress", - }, - nil, - types.PrimaryACLTier, - ) - egressACL.UUID = "egress-multicast-UUID" - ingressACL := libovsdbops.BuildACL( - joinACLName(namespace1, "MulticastAllowIngress"), - nbdb.ACLDirectionToLport, - types.DefaultRoutedMcastAllowPriority, - "outport == @"+types.ClusterRtrPortGroupNameBase+" && (ip4.mcast || mldv1 || mldv2 || (ip6.dst[120..127] == 0xff && ip6.dst[116] == 1))", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{ - defaultDenyPolicyTypeACLExtIdKey: "Ingress", - }, - nil, - types.PrimaryACLTier, - ) - ingressACL.UUID = "ingress-multicast-UUID" - - clusterRtrPortGroup := buildPortGroup( - types.ClusterRtrPortGroupNameBase, - types.ClusterRtrPortGroupNameBase, - nil, - []*nbdb.ACL{egressACL, ingressACL}, - ) - clusterRtrPortGroup.UUID = clusterRtrPortGroup.Name + "-UUID" - initialDb := []libovsdbtest.TestData{clusterRtrPortGroup, egressACL, ingressACL} - finalClusterRtrPortGroup := buildPortGroup( - types.ClusterRtrPortGroupNameBase, - types.ClusterRtrPortGroupNameBase, - nil, - nil, - ) - finalClusterRtrPortGroup.UUID = finalClusterRtrPortGroup.Name + "-UUID" - finalDb := []libovsdbtest.TestData{finalClusterRtrPortGroup} - testSyncerWithData([]aclSync{}, controllerName, initialDb, finalDb, nil) - }) -}) diff --git a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_suite_test.go b/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_suite_test.go deleted file mode 100644 index 2709d429fa..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_suite_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package address_set_test - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -func TestAddressSet(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "AddressSet Suite") -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync.go b/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync.go deleted file mode 100644 index fa59997948..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync.go +++ /dev/null @@ -1,399 +0,0 @@ -package address_set - -import ( - "fmt" - "strconv" - "strings" - - "k8s.io/klog/v2" - - libovsdbclient "github.com/ovn-org/libovsdb/client" - "github.com/ovn-org/libovsdb/ovsdb" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/batching" -) - -const ( - // legacy constants for address set names - legacyIPv4AddressSetSuffix = "_v4" - legacyIPv6AddressSetSuffix = "_v6" - - // OVN-K8S legacy Address Sets Names - hybridRoutePolicyPrefix = "hybrid-route-pods-" - egressQoSRulePrefix = "egress-qos-pods-" - rulePriorityDelimeter = "-" - clusterNodeIP = "cluster-node-ips" - egressIPServedPods = "egressip-served-pods" - egressServiceServedPods = "egresssvc-served-pods" - - // constants that are still used by the handlers. - // They are copied here to make sure address set sync has a pre-defined format of objects. - // If some owner needs to change the ids it is using at some point, it should run after AddressSetsSyncer, - // and update object based on the syncer format. - egressFirewallACLExtIdKey = "egressFirewall" - egressServiceServedPodsAddrSetName = "egresssvc-served-pods" - nodeIPAddrSetName = "node-ips" - egressIPServedPodsAddrSetName = "egressip-served-pods" - ipv4AddressSetFactoryID = "v4" - ipv6AddressSetFactoryID = "v6" -) - -func truncateSuffixFromAddressSet(asName string) (string, string) { - // Legacy address set names will not have v4 or v6 suffixes. - // truncate them for the new ones - if strings.HasSuffix(asName, legacyIPv4AddressSetSuffix) { - return strings.TrimSuffix(asName, legacyIPv4AddressSetSuffix), legacyIPv4AddressSetSuffix - } - if strings.HasSuffix(asName, legacyIPv6AddressSetSuffix) { - return strings.TrimSuffix(asName, legacyIPv6AddressSetSuffix), legacyIPv6AddressSetSuffix - } - return asName, "" -} - -type updateAddrSetInfo struct { - acls []*nbdb.ACL - qoses []*nbdb.QoS - lrps []*nbdb.LogicalRouterPolicy - oldAddrSet *nbdb.AddressSet - newAddrSet *nbdb.AddressSet -} - -type AddressSetsSyncer struct { - nbClient libovsdbclient.Client - controllerName string - // txnBatchSize is used to control how many address sets will be updated with 1 db transaction. - txnBatchSize int - ignoredAddressSets int -} - -// controllerName is the name of the new controller that should own all address sets without controller -func NewAddressSetSyncer(nbClient libovsdbclient.Client, controllerName string) *AddressSetsSyncer { - return &AddressSetsSyncer{ - nbClient: nbClient, - controllerName: controllerName, - txnBatchSize: 50, - } -} - -// return if address set is owned by network policy and its namespace, name, internal id -func checkIfNetpol(asName string) (netpolOwned bool, namespace, name, direction, idx string) { - // old format fmt.Sprintf("%s.%s.%s.%d", gp.policyNamespace, gp.policyName, direction, gp.idx) - // namespace doesn't have dots - s := strings.Split(asName, ".") - sLen := len(s) - // index should be a number - _, numErr := strconv.Atoi(s[sLen-1]) - if sLen >= 4 && (s[sLen-2] == "ingress" || s[sLen-2] == "egress") && numErr == nil { - // address set is owned by network policy - netpolOwned = true - // namespace doesn't have dots - namespace = s[0] - // policyName may have dots, join in that case - name = strings.Join(s[1:sLen-2], ".") - direction = s[sLen-2] - idx = s[sLen-1] - } - return -} - -func (syncer *AddressSetsSyncer) getEgressIPAddrSetDbIDs(name, network string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetEgressIP, syncer.controllerName, map[libovsdbops.ExternalIDKey]string{ - // egress ip creates cluster-wide address sets with egressIpAddrSetName - libovsdbops.ObjectNameKey: name, - libovsdbops.NetworkKey: network, - }) -} - -func (syncer *AddressSetsSyncer) getEgressServiceAddrSetDbIDs() *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetEgressService, syncer.controllerName, map[libovsdbops.ExternalIDKey]string{ - // egressService has 1 cluster-wide address set - libovsdbops.ObjectNameKey: egressServiceServedPodsAddrSetName, - }) -} - -func (syncer *AddressSetsSyncer) getHybridRouteAddrSetDbIDs(nodeName string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - // there is only 1 address set of this type per node - libovsdbops.ObjectNameKey: nodeName, - }) -} - -func (syncer *AddressSetsSyncer) getEgressQosAddrSetDbIDs(namespace, priority string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetEgressQoS, syncer.controllerName, map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: namespace, - // priority is the unique id for address set within given namespace - libovsdbops.PriorityKey: priority, - }) -} - -func (syncer *AddressSetsSyncer) getNetpolAddrSetDbIDs(policyNamespace, policyName, direction, idx string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNetworkPolicy, syncer.controllerName, map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: policyNamespace + "_" + policyName, - // direction and idx uniquely identify address set (= gress policy rule) - libovsdbops.PolicyDirectionKey: direction, - libovsdbops.GressIdxKey: idx, - }) -} - -func (syncer *AddressSetsSyncer) getEgressFirewallDNSAddrSetDbIDs(dnsName string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetEgressFirewallDNS, syncer.controllerName, - map[libovsdbops.ExternalIDKey]string{ - // dns address sets are cluster-wide objects, they have unique names - libovsdbops.ObjectNameKey: dnsName, - }) -} - -func (syncer *AddressSetsSyncer) getNamespaceAddrSetDbIDs(namespaceName string) *libovsdbops.DbObjectIDs { - return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetNamespace, syncer.controllerName, map[libovsdbops.ExternalIDKey]string{ - // namespace has only 1 address set, no additional ids are required - libovsdbops.ObjectNameKey: namespaceName, - }) -} - -func buildNewAddressSet(dbIDs *libovsdbops.DbObjectIDs, ipFamily string) *nbdb.AddressSet { - dbIDsWithIPFam := dbIDs.AddIDs(map[libovsdbops.ExternalIDKey]string{libovsdbops.IPFamilyKey: ipFamily}) - externalIDs := dbIDsWithIPFam.GetExternalIDs() - name := externalIDs[libovsdbops.PrimaryIDKey.String()] - as := &nbdb.AddressSet{ - Name: util.HashForOVN(name), - ExternalIDs: externalIDs, - } - return as -} - -// getReferencingObjsAndNewDbIDs finds all object that reference stale address set and tries to create a new dbIDs -// based on referencing objects -func (syncer *AddressSetsSyncer) getReferencingObjsAndNewDbIDs(oldHash, oldName string) (acls []*nbdb.ACL, - qoses []*nbdb.QoS, lrps []*nbdb.LogicalRouterPolicy, dbIDs *libovsdbops.DbObjectIDs, err error) { - // find all referencing objects - aclPred := func(acl *nbdb.ACL) bool { - return strings.Contains(acl.Match, "$"+oldHash) - } - acls, err = libovsdbops.FindACLsWithPredicate(syncer.nbClient, aclPred) - if err != nil { - err = fmt.Errorf("failed to find acls for address set %s: %v", oldHash, err) - return - } - qosPred := func(qos *nbdb.QoS) bool { - return strings.Contains(qos.Match, "$"+oldHash) - } - qoses, err = libovsdbops.FindQoSesWithPredicate(syncer.nbClient, qosPred) - if err != nil { - err = fmt.Errorf("failed to find qoses for address set %s: %v", oldHash, err) - return - } - lrpPred := func(lrp *nbdb.LogicalRouterPolicy) bool { - return strings.Contains(lrp.Match, "$"+oldHash) - } - lrps, err = libovsdbops.FindLogicalRouterPoliciesWithPredicate(syncer.nbClient, lrpPred) - if err != nil { - err = fmt.Errorf("failed to find lrps for address set %s: %v", oldHash, err) - return - } - // build dbIDs - switch { - // Filter address sets with pre-defined names - case oldName == clusterNodeIP: - dbIDs = syncer.getEgressIPAddrSetDbIDs(nodeIPAddrSetName, "default") - case oldName == egressIPServedPods: - dbIDs = syncer.getEgressIPAddrSetDbIDs(egressIPServedPodsAddrSetName, "default") - case oldName == egressServiceServedPods: - dbIDs = syncer.getEgressServiceAddrSetDbIDs() - // HybridNodeRoute and EgressQoS address sets have specific prefixes - // Try to parse dbIDs from address set name - case strings.HasPrefix(oldName, hybridRoutePolicyPrefix): - // old name has format types.hybridRoutePolicyPrefix + node - nodeName := oldName[len(hybridRoutePolicyPrefix):] - dbIDs = syncer.getHybridRouteAddrSetDbIDs(nodeName) - case strings.HasPrefix(oldName, egressQoSRulePrefix): - // oldName has format fmt.Sprintf("%s%s%s%d", egressQoSRulePrefix, namespace, rulePriorityDelimeter, priority) - // we extract the namespace from the id by removing the prefix and the priority suffix - // egress-qos-pods-my-namespace-123 -> my-namespace - namespaceWithPrio := oldName[len(egressQoSRulePrefix):] - // namespaceWithPrio = my-namespace-123 - delIndex := strings.LastIndex(namespaceWithPrio, rulePriorityDelimeter) - ns := namespaceWithPrio[:delIndex] - priority := namespaceWithPrio[delIndex+1:] - dbIDs = syncer.getEgressQosAddrSetDbIDs(ns, priority) - default: - // netpol address set has a specific name format ... - netpolOwned, namespace, name, direction, idx := checkIfNetpol(oldName) - if netpolOwned { - dbIDs = syncer.getNetpolAddrSetDbIDs(namespace, name, direction, idx) - } else { - // we have only egress firewall dns and namespace address sets left - // try to distinguish them by referencing acls - if len(acls) > 0 { - // if given address set is owned by egress firewall, all ACLs will be owned by the same object - acl := acls[0] - // check if egress firewall dns is the owner - // the only address set that may be referenced in egress firewall destination is dns address set - if acl.ExternalIDs[egressFirewallACLExtIdKey] != "" && strings.Contains(acl.Match, ".dst == $"+oldHash) { - dbIDs = syncer.getEgressFirewallDNSAddrSetDbIDs(oldName) - } - } - if dbIDs == nil { - // we failed to find the owner, assume everything else to be owned by namespace, - // since it doesn't have any specific-linked objects, - // oldName is just namespace name - dbIDs = syncer.getNamespaceAddrSetDbIDs(oldName) - } - } - } - // dbIDs is set - return -} - -func (syncer *AddressSetsSyncer) getUpdateAddrSetOps(addrSetsInfo []*updateAddrSetInfo) (ops []ovsdb.Operation, err error) { - // one referencing object may contain multiple references that need to be updated - // these maps are used to track referenced that need to be replaced for every object type - aclsToUpdate := map[string]*nbdb.ACL{} - qosesToUpdate := map[string]*nbdb.QoS{} - lrpsToUpdate := map[string]*nbdb.LogicalRouterPolicy{} - - for _, addrSetInfo := range addrSetsInfo { - if addrSetInfo.newAddrSet == nil { - // new address set wasn't built - if len(addrSetInfo.acls) == 0 && len(addrSetInfo.qoses) == 0 && len(addrSetInfo.lrps) == 0 { - // address set is stale and not referenced, clean up - ops, err = libovsdbops.DeleteAddressSetsOps(syncer.nbClient, ops, addrSetInfo.oldAddrSet) - } else { - syncer.ignoredAddressSets += 1 - } - continue - } - - oldName := addrSetInfo.oldAddrSet.ExternalIDs["name"] - // create updated address set - ops, err = libovsdbops.CreateOrUpdateAddressSetsOps(syncer.nbClient, ops, addrSetInfo.newAddrSet) - if err != nil { - return nil, fmt.Errorf("failed to get update address set ops for address set %s: %v", oldName, err) - } - // delete old address set - ops, err = libovsdbops.DeleteAddressSetsOps(syncer.nbClient, ops, addrSetInfo.oldAddrSet) - if err != nil { - return nil, fmt.Errorf("failed to get update address set ops for address set %s: %v", oldName, err) - } - oldHash := "$" + addrSetInfo.oldAddrSet.Name - newHash := "$" + addrSetInfo.newAddrSet.Name - - for _, acl := range addrSetInfo.acls { - if _, ok := aclsToUpdate[acl.UUID]; !ok { - aclsToUpdate[acl.UUID] = acl - } - aclsToUpdate[acl.UUID].Match = strings.ReplaceAll(aclsToUpdate[acl.UUID].Match, oldHash, newHash) - } - - for _, qos := range addrSetInfo.qoses { - if _, ok := qosesToUpdate[qos.UUID]; !ok { - qosesToUpdate[qos.UUID] = qos - } - qosesToUpdate[qos.UUID].Match = strings.ReplaceAll(qosesToUpdate[qos.UUID].Match, oldHash, newHash) - } - - for _, lrp := range addrSetInfo.lrps { - if _, ok := lrpsToUpdate[lrp.UUID]; !ok { - lrpsToUpdate[lrp.UUID] = lrp - } - lrpsToUpdate[lrp.UUID].Match = strings.ReplaceAll(lrpsToUpdate[lrp.UUID].Match, oldHash, newHash) - } - } - - for _, acl := range aclsToUpdate { - ops, err = libovsdbops.UpdateACLsOps(syncer.nbClient, ops, acl) - if err != nil { - return nil, fmt.Errorf("failed to get update acl ops: %v", err) - } - } - for _, qos := range qosesToUpdate { - ops, err = libovsdbops.UpdateQoSesOps(syncer.nbClient, ops, qos) - if err != nil { - return nil, fmt.Errorf("failed to get update qos ops: %v", err) - } - } - for _, lrp := range lrpsToUpdate { - ops, err = libovsdbops.UpdateLogicalRouterPoliciesOps(syncer.nbClient, ops, lrp) - if err != nil { - return nil, fmt.Errorf("failed to get update LRPs ops: %v", err) - } - } - return -} - -// getAddrSetUpdateInfo adds db ops to update address set and objects that reference it -func (syncer *AddressSetsSyncer) getAddrSetUpdateInfo(as *nbdb.AddressSet) (*updateAddrSetInfo, error) { - oldName, ipSuffix := truncateSuffixFromAddressSet(as.ExternalIDs["name"]) - // oldName may be empty if address set doesn't have ExternalID set - acls, qoses, lrps, dbIDs, err := syncer.getReferencingObjsAndNewDbIDs(as.Name, oldName) - if err != nil { - return nil, fmt.Errorf("failed to get new dbIDs for address set %s: %v", oldName, err) - } - updateAddrSet := &updateAddrSetInfo{acls, qoses, lrps, as, nil} - - if oldName == "" { - klog.Infof("external_ids->name missing, stale address set %s", as.Name) - return updateAddrSet, nil - } - if ipSuffix == "" { - klog.Infof("Found stale address set %s without ip family suffix and empty ips list", oldName) - return updateAddrSet, nil - } - var nbdbAS *nbdb.AddressSet - if ipSuffix == legacyIPv4AddressSetSuffix { - nbdbAS = buildNewAddressSet(dbIDs, ipv4AddressSetFactoryID) - } else { - nbdbAS = buildNewAddressSet(dbIDs, ipv6AddressSetFactoryID) - } - // since we need to update addressSet.Name, which is an index and not listed in getNonZeroAddressSetMutableFields, - // we copy existing addressSet, update it address set needed, and replace (delete and create) existing address set with the updated - newAS := as.DeepCopy() - // reset UUID - newAS.UUID = "" - // update address set Name - newAS.Name = nbdbAS.Name - // remove old "name" ExternalID - delete(newAS.ExternalIDs, "name") - // Insert new externalIDs generated by address set factory - for key, value := range nbdbAS.ExternalIDs { - newAS.ExternalIDs[key] = value - } - updateAddrSet.newAddrSet = newAS - return updateAddrSet, nil -} - -func (syncer *AddressSetsSyncer) SyncAddressSets() error { - // stale address sets don't have controller ID - p := libovsdbops.GetNoOwnerPredicate[*nbdb.AddressSet]() - addrSetList, err := libovsdbops.FindAddressSetsWithPredicate(syncer.nbClient, p) - if err != nil { - return fmt.Errorf("failed to find stale address sets: %v", err) - } - - err = batching.Batch[*nbdb.AddressSet](syncer.txnBatchSize, addrSetList, func(batchAddrSets []*nbdb.AddressSet) error { - addrSetInfos := []*updateAddrSetInfo{} - for _, addrSet := range batchAddrSets { - updateInfo, err := syncer.getAddrSetUpdateInfo(addrSet) - if err != nil { - return err - } - addrSetInfos = append(addrSetInfos, updateInfo) - } - // generate update ops - ops, err := syncer.getUpdateAddrSetOps(addrSetInfos) - if err != nil { - return fmt.Errorf("failed to get update address sets ops: %w", err) - } - _, err = libovsdbops.TransactAndCheck(syncer.nbClient, ops) - if err != nil { - return fmt.Errorf("failed to transact address set sync ops: %v", err) - } - return nil - }) - klog.Infof("SyncAddressSets found %d stale address sets, %d of them were ignored", - len(addrSetList), syncer.ignoredAddressSets) - return err -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync_test.go b/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync_test.go deleted file mode 100644 index cbec2a22a3..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/address_set/address_set_sync_test.go +++ /dev/null @@ -1,723 +0,0 @@ -package address_set - -import ( - "fmt" - "strings" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" -) - -type asSync struct { - before *nbdb.AddressSet - after *libovsdbops.DbObjectIDs - afterTweak func(*nbdb.AddressSet) - addressSetFactoryIPID string - remove bool - leave bool -} - -// data is used to pass address set for initial and expected db state, initialDbState may be used to add objects -// of other types to the initial db state, and finalDbState may be used to set the expected state of objects -// passed in initialDbState. If finalDbState is nil, final state will be updated automatically by changing address set -// references for initial objects from initialDbState. -func testSyncerWithData(data []asSync, initialDbState, finalDbState []libovsdbtest.TestData, controllerName string) { - // create initial db setup - dbSetup := libovsdbtest.TestSetup{NBData: initialDbState} - for _, asSync := range data { - dbSetup.NBData = append(dbSetup.NBData, asSync.before) - } - libovsdbOvnNBClient, _, libovsdbCleanup, err := libovsdbtest.NewNBSBTestHarness(dbSetup) - defer libovsdbCleanup.Cleanup() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create expected data using addressSetFactory - expectedDbState := initialDbState - if finalDbState != nil { - expectedDbState = finalDbState - } - for _, asSync := range data { - if asSync.remove { - continue - } - if asSync.leave { - expectedDbState = append(expectedDbState, asSync.before) - } else if asSync.after != nil { - updatedAS := getUpdatedAS(asSync.before, asSync.after, asSync.addressSetFactoryIPID) - if asSync.afterTweak != nil { - asSync.afterTweak(updatedAS) - } - expectedDbState = append(expectedDbState, updatedAS) - if finalDbState == nil { - for _, dbObj := range expectedDbState { - if lrp, ok := dbObj.(*nbdb.LogicalRouterPolicy); ok { - lrp.Match = strings.ReplaceAll(lrp.Match, "$"+asSync.before.Name, "$"+updatedAS.Name) - } - if acl, ok := dbObj.(*nbdb.ACL); ok { - acl.Match = strings.ReplaceAll(acl.Match, "$"+asSync.before.Name, "$"+updatedAS.Name) - } - if qos, ok := dbObj.(*nbdb.QoS); ok { - qos.Match = strings.ReplaceAll(qos.Match, "$"+asSync.before.Name, "$"+updatedAS.Name) - } - } - } - } - } - // run sync - syncer := NewAddressSetSyncer(libovsdbOvnNBClient, controllerName) - // to make sure batching works, set it to 2 to cover number of batches = 0,1,>1 - syncer.txnBatchSize = 2 - err = syncer.SyncAddressSets() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // check results - gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDbState)) -} - -func createInitialAS(name string, ips []string) *nbdb.AddressSet { - return &nbdb.AddressSet{ - UUID: name, - Name: hashedAddressSet(name), - ExternalIDs: map[string]string{ - "name": name, - }, - Addresses: ips, - } -} - -func getUpdatedAS(as *nbdb.AddressSet, dbIDs *libovsdbops.DbObjectIDs, - addressSetFactoryIpID string) *nbdb.AddressSet { - var nbdbAS *nbdb.AddressSet - if addressSetFactoryIpID == ipv4AddressSetFactoryID { - nbdbAS = buildNewAddressSet(dbIDs, ipv4AddressSetFactoryID) - } else { - nbdbAS = buildNewAddressSet(dbIDs, ipv6AddressSetFactoryID) - } - updatedAS := as.DeepCopy() - updatedAS.Name = nbdbAS.Name - - delete(updatedAS.ExternalIDs, "name") - for key, value := range nbdbAS.ExternalIDs { - updatedAS.ExternalIDs[key] = value - } - return updatedAS -} - -func hashedAddressSet(s string) string { - return util.HashForOVN(s) -} - -var _ = ginkgo.Describe("OVN Address Set Syncer", func() { - const ( - testIPv4 = "1.1.1.1" - testGatewayIPv4 = "1.2.3.4" - testIPv6 = "2001:db8::68" - testGatewayIPv6 = "2001:db8::69" - controllerName = "fake-controller" - anotherControllerName = "another-controller" - qosPriority = 1000 - ) - var syncerToBuildData = AddressSetsSyncer{ - controllerName: controllerName, - } - - ginkgo.It("destroys address sets in old non dual stack format", func() { - testData := []asSync{ - // to be removed as the format is stale, and no references exist - { - before: createInitialAS("as1", nil), - remove: true, - }, - // not to be removed, address in new dual stack format, ipv4 - { - before: createInitialAS("as1"+legacyIPv4AddressSetSuffix, nil), - after: syncerToBuildData.getNamespaceAddrSetDbIDs("as1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - // not to be removed, address in new dual stack format, ipv6 - { - before: createInitialAS("as2"+legacyIPv6AddressSetSuffix, nil), - after: syncerToBuildData.getNamespaceAddrSetDbIDs("as2"), - addressSetFactoryIPID: ipv6AddressSetFactoryID, - }, - } - testSyncerWithData(testData, nil, nil, controllerName) - }) - ginkgo.It("skips address sets owned by another controller", func() { - testData := []asSync{ - { - before: &nbdb.AddressSet{ - UUID: "as1", - Name: hashedAddressSet("as1"), - ExternalIDs: map[string]string{ - libovsdbops.OwnerControllerKey.String(): anotherControllerName, - "name": "as_name"}, - }, - leave: true, - }, - { - before: &nbdb.AddressSet{ - UUID: "as2", - Name: hashedAddressSet("as2"), - ExternalIDs: map[string]string{ - libovsdbops.OwnerControllerKey.String(): anotherControllerName, - "name": "as_name_v4"}, - }, - leave: true, - }, - } - testSyncerWithData(testData, nil, nil, controllerName) - }) - ginkgo.It("ignores stale address set with reference, no ipFamily", func() { - // no ip family - asName := hybridRoutePolicyPrefix + "node1" - hashedASName := hashedAddressSet(asName) - testData := []asSync{ - { - before: createInitialAS(asName, []string{testIPv4}), - leave: true, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASName), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("ignores stale address set with reference, no ExternalIDs[name]", func() { - hashedASName := hashedAddressSet("as1") - testData := []asSync{ - { - before: &nbdb.AddressSet{ - UUID: "as1", - Name: hashedASName, - ExternalIDs: map[string]string{ - "wrong-id": "as_name"}, - }, - leave: true, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASName), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("preserves unknown ExternalIDs", func() { - initialAS := createInitialAS(hybridRoutePolicyPrefix+"node1_v4", []string{testIPv4}) - initialAS.ExternalIDs["unknown_id"] = "id_value" - testData := []asSync{ - { - before: initialAS, - after: syncerToBuildData.getHybridRouteAddrSetDbIDs("node1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - afterTweak: func(as *nbdb.AddressSet) { - as.ExternalIDs["unknown_id"] = "id_value" - }, - }, - } - testSyncerWithData(testData, nil, nil, controllerName) - }) - // verify different address set owners - ginkgo.It("updates address set owned by HybridNodeRouteOwnerType and its references", func() { - asName := hybridRoutePolicyPrefix + "node1_v4" - hashedASName := hashedAddressSet(asName) - testData := []asSync{ - { - before: createInitialAS(asName, []string{testIPv4}), - after: syncerToBuildData.getHybridRouteAddrSetDbIDs("node1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASName), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by EgressQoSOwnerType and its references", func() { - asName := egressQoSRulePrefix + "namespace-123_v4" - hashedASName := hashedAddressSet(asName) - testData := []asSync{ - { - before: createInitialAS(asName, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "123"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalSwitch{ - UUID: "node1-UUID", - Name: "node1", - QOSRules: []string{"qos1-UUID"}, - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos1-UUID", - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by EgressQoSOwnerType and its references", func() { - // this test checks the batching works for number of address sets = 5, which is 2.5 test batch sizes. - asName1 := egressQoSRulePrefix + "namespace-1_v4" - hashedASName1 := hashedAddressSet(asName1) - asName2 := egressQoSRulePrefix + "namespace-2_v4" - hashedASName2 := hashedAddressSet(asName2) - asName3 := egressQoSRulePrefix + "namespace-3_v4" - hashedASName3 := hashedAddressSet(asName3) - asName4 := egressQoSRulePrefix + "namespace-4_v4" - hashedASName4 := hashedAddressSet(asName4) - asName5 := egressQoSRulePrefix + "namespace-5_v4" - hashedASName5 := hashedAddressSet(asName5) - testData := []asSync{ - { - before: createInitialAS(asName1, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName2, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "2"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName3, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "3"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName4, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "4"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName5, []string{testIPv4}), - after: syncerToBuildData.getEgressQosAddrSetDbIDs("namespace", "5"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName1, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos1-UUID", - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName2, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos2-UUID", - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName3, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos3-UUID", - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName4, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos4-UUID", - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName5, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos5-UUID", - }, - &nbdb.LogicalSwitch{ - UUID: "node1-UUID", - Name: "node1", - QOSRules: []string{"qos1-UUID", "qos2-UUID", "qos3-UUID", "qos4-UUID", "qos5-UUID"}, - }} - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by EgressFirewallDNSOwnerType and its references", func() { - asName := "dns.name_v4" - hashedASName := hashedAddressSet(asName) - testData := []asSync{ - { - before: createInitialAS(asName, []string{testIPv4}), - after: syncerToBuildData.getEgressFirewallDNSAddrSetDbIDs("dns.name"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - acl := libovsdbops.BuildACL( - "aclName", - nbdb.ACLDirectionToLport, - types.EgressFirewallStartPriority, - "(ip4.dst == 1.2.3.4/32) && ip4.src == $namespaceAS && ip4.dst == $"+hashedASName, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{egressFirewallACLExtIdKey: "egressfirewall1"}, - nil, - types.PrimaryACLTier, - ) - acl.UUID = "acl-UUID" - initialDb := []libovsdbtest.TestData{ - acl, - &nbdb.LogicalSwitch{ - UUID: "node1-UUID", - Name: "node1", - ACLs: []string{acl.UUID}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by NetworkPolicyOwnerType and its references", func() { - asNameEgress := "namespace.netpol.egress.0_v4" - hashedASNameasNameEgress := hashedAddressSet(asNameEgress) - asNameIngress := "namespace.netpol.ingress.0_v4" - hashedASNameasNameIngress := hashedAddressSet(asNameIngress) - testData := []asSync{ - { - before: createInitialAS(asNameEgress, []string{testIPv4}), - after: syncerToBuildData.getNetpolAddrSetDbIDs("namespace", "netpol", "egress", "0"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asNameIngress, []string{"1.1.1.2"}), - after: syncerToBuildData.getNetpolAddrSetDbIDs("namespace", "netpol", "ingress", "0"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - acl1 := libovsdbops.BuildACL( - "acl1", - nbdb.ACLDirectionFromLport, - types.EgressFirewallStartPriority, - fmt.Sprintf("ip4.src == {$%s} && outport == @a13757631697825269621", hashedASNameasNameEgress), - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - nil, - map[string]string{ - "apply-after-lb": "true", - }, - types.PrimaryACLTier, - ) - acl1.UUID = "acl1-UUID" - acl2 := libovsdbops.BuildACL( - "acl2", - nbdb.ACLDirectionToLport, - types.EgressFirewallStartPriority, - fmt.Sprintf("inport == @a13757631697825269621 && ip.dst == {$%s}", hashedASNameasNameIngress), - nbdb.ACLActionAllowRelated, - types.OvnACLLoggingMeter, - "", - false, - nil, - nil, - types.PrimaryACLTier, - ) - acl2.UUID = "acl2-UUID" - initialDb := []libovsdbtest.TestData{ - acl1, - acl2, - &nbdb.LogicalSwitch{ - UUID: "node1-UUID", - Name: "node1", - ACLs: []string{acl1.UUID, acl2.UUID}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by NamespaceOwnerType and its references", func() { - asName := "namespace_v4" - hashedASName := hashedAddressSet(asName) - testData := []asSync{ - { - before: createInitialAS(asName, []string{testIPv4}), - after: syncerToBuildData.getNamespaceAddrSetDbIDs("namespace"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - // namespace-owned address set may be referenced from different objects, - // test lrp, acl and qos - acl := libovsdbops.BuildACL( - "aclName", - nbdb.ACLDirectionToLport, - types.EgressFirewallStartPriority, - "ip4.src == $"+hashedASName, - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - map[string]string{egressFirewallACLExtIdKey: "egressfirewall1"}, - nil, - types.PrimaryACLTier, - ) - acl.UUID = "acl-UUID" - initialDb := []libovsdbtest.TestData{ - acl, - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: "ip4.src == $" + hashedASName, - }, - &nbdb.QoS{ - Direction: nbdb.QoSDirectionToLport, - Match: "(ip4.dst == 1.2.3.4/32) && ip4.src == $" + hashedASName, - Priority: qosPriority, - Action: map[string]int{nbdb.QoSActionDSCP: 40}, - ExternalIDs: map[string]string{"EgressQoS": "namespace"}, - UUID: "qos1-UUID", - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1"}, - }, - &nbdb.LogicalSwitch{ - UUID: "node1-UUID", - Name: "node1", - ACLs: []string{acl.UUID}, - QOSRules: []string{"qos1-UUID"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set owned by EgressIP and EgressSVC and its references", func() { - asName1 := egressIPServedPods + legacyIPv4AddressSetSuffix - hashedASName1 := hashedAddressSet(asName1) - asName2 := clusterNodeIP + legacyIPv4AddressSetSuffix - hashedASName2 := hashedAddressSet(asName2) - asName3 := egressServiceServedPods + legacyIPv4AddressSetSuffix - hashedASName3 := hashedAddressSet(asName3) - testData := []asSync{ - { - before: createInitialAS(asName1, []string{testIPv4}), - after: syncerToBuildData.getEgressIPAddrSetDbIDs(egressIPServedPodsAddrSetName, "default"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName2, []string{testIPv4}), - after: syncerToBuildData.getEgressIPAddrSetDbIDs(nodeIPAddrSetName, "default"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName3, []string{testIPv4}), - after: syncerToBuildData.getEgressServiceAddrSetDbIDs(), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", - hashedASName1, hashedASName3, hashedASName2), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-node-UUID", - Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"default-no-reroute-node-UUID"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set and its references for both ip families", func() { - // the difference from "updates address sets owned by HybridNodeRouteOwnerType and its references" test - // is that there are 2 address sets for ipv4 and ipv6 - asNamev4 := hybridRoutePolicyPrefix + "node1_v4" - hashedASNamev4 := hashedAddressSet(asNamev4) - asNamev6 := hybridRoutePolicyPrefix + "node1_v6" - hashedASNamev6 := hashedAddressSet(asNamev6) - testData := []asSync{ - { - before: createInitialAS(asNamev4, []string{testIPv4}), - after: syncerToBuildData.getHybridRouteAddrSetDbIDs("node1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asNamev6, []string{testIPv6}), - after: syncerToBuildData.getHybridRouteAddrSetDbIDs("node1"), - addressSetFactoryIPID: ipv6AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASNamev4), - }, - &nbdb.LogicalRouterPolicy{ - UUID: "lrp2", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv6}, - Match: fmt.Sprintf(`inport == "%s%s" && ip6.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASNamev6), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1", "lrp2"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) - ginkgo.It("updates address set and its references for new dualstack format, and ignores a stale one", func() { - // the difference from "updates address sets owned by HybridNodeRouteOwnerType and its references" test - // is that there 2 address sets for ipv4 and old non-dualstack format. - // a new one should be updated, and a stale one should be ignored - asNamev4 := hybridRoutePolicyPrefix + "node1_v4" - hashedASNamev4 := hashedAddressSet(asNamev4) - asNameOld := hybridRoutePolicyPrefix + "node1" - hashedASNameOld := hashedAddressSet(asNameOld) - testData := []asSync{ - { - before: createInitialAS(asNamev4, []string{testIPv4}), - after: syncerToBuildData.getHybridRouteAddrSetDbIDs("node1"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asNameOld, []string{testIPv4}), - leave: true, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASNamev4), - }, - &nbdb.LogicalRouterPolicy{ - UUID: "lrp2", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASNameOld), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1", "lrp2"}, - }, - } - newHashedASNamev4 := buildNewAddressSet(testData[0].after, ipv4AddressSetFactoryID).Name - testSyncerWithData(testData, initialDb, []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "lrp1", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", newHashedASNamev4), - }, - &nbdb.LogicalRouterPolicy{ - UUID: "lrp2", - Priority: types.HybridOverlayReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{testGatewayIPv4}, - Match: fmt.Sprintf(`inport == "%s%s" && ip4.src == $%s`, types.RouterToSwitchPrefix, "node1", hashedASNameOld), - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"lrp1", "lrp2"}, - }, - }, controllerName) - }) - - ginkgo.It("updates referencing object if at least one address set was updated", func() { - // address set without ip family and ips, will be ignored - asName1 := egressIPServedPods - hashedASName1 := hashedAddressSet(asName1) - asName2 := clusterNodeIP + legacyIPv4AddressSetSuffix - hashedASName2 := hashedAddressSet(asName2) - asName3 := egressServiceServedPods + legacyIPv4AddressSetSuffix - hashedASName3 := hashedAddressSet(asName3) - testData := []asSync{ - { - before: createInitialAS(asName1, []string{}), - leave: true, - }, - { - before: createInitialAS(asName2, []string{testIPv4}), - after: syncerToBuildData.getEgressIPAddrSetDbIDs(nodeIPAddrSetName, "default"), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - { - before: createInitialAS(asName3, []string{testIPv4}), - after: syncerToBuildData.getEgressServiceAddrSetDbIDs(), - addressSetFactoryIPID: ipv4AddressSetFactoryID, - }, - } - initialDb := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", - hashedASName1, hashedASName3, hashedASName2), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-node-UUID", - Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, - }, - &nbdb.LogicalRouter{ - UUID: types.OVNClusterRouter + "-UUID", - Name: types.OVNClusterRouter, - Policies: []string{"default-no-reroute-node-UUID"}, - }, - } - testSyncerWithData(testData, initialDb, nil, controllerName) - }) -}) diff --git a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_gorup_suite_test.go b/go-controller/pkg/ovn/external_ids_syncer/port_group/port_gorup_suite_test.go deleted file mode 100644 index be926c3fa2..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_gorup_suite_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package port_group - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -func TestPortGroup(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "PortGroup Suite") -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync.go b/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync.go deleted file mode 100644 index 16fc21d023..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync.go +++ /dev/null @@ -1,362 +0,0 @@ -package port_group - -import ( - "fmt" - "regexp" - "strings" - - "k8s.io/klog/v2" - - libovsdbclient "github.com/ovn-org/libovsdb/client" - "github.com/ovn-org/libovsdb/ovsdb" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" -) - -const ( - // port groups suffixes - // ingressDefaultDenySuffix is the suffix used when creating the ingress port group for a namespace - ingressDefaultDenySuffix = "ingressDefaultDeny" - // egressDefaultDenySuffix is the suffix used when creating the ingress port group for a namespace - egressDefaultDenySuffix = "egressDefaultDeny" - defaultNetworkControllerName = "default-network-controller" - // values for libovsdbops.PolicyDirectionKey - // We don't reuse any external constants in this package, as it should update db entries from a - // pre-defined format. If in the future values for some external constants change, this package shouldn't be affected. - policyDirectionIngress = "Ingress" - policyDirectionEgress = "Egress" -) - -type updatePortGroupInfo struct { - acls []*nbdb.ACL - oldPG *nbdb.PortGroup - newPG *nbdb.PortGroup -} - -type PortGroupSyncer struct { - nbClient libovsdbclient.Client - getPGWeight func(acls, ports int) float64 -} - -// getPGWeight returns weight of a port group based on the number of linked acls and ports. -// port group syncer updates both port groups and related acls, transaction time depends on the number of acls related to every port group. -// The concept of "weight" reflects that port group with 1 ACL and port group with 100 ACLs will have different -// transaction time. Based on local testing, the results are -// Update for 30`000 port groups with 1 ACL each takes 7,14 sec -// here almost linear dependency begins -// Update for 5`000 port groups with 10 ACL each takes 6 sec -// Update for 500 port groups with 100 ACL each takes 5,95 sec -// Update for 50 port groups with 1000 ACL each takes 5,84 sec -// Update for 5 port groups with 10000 ACL each takes 5,42 sec -// Considering given times safe within 10 second timeout, 5 port groups with 10000 ACLs may be updated in one batch. -// That makes a weight of port group with 10000 ACLs = 1/5. -// The number of ports in a port group also affects the weight. By adding a given number of ports in the local testing -// we got the following extra transaction time (o ports = 0 extra time): -// 50 pg + 1000 ACL: -// -// 1K ports => 0.69 s -// 5K ports => 2.89 s -// 10K ports => 6.85 s -// -// 500 pg + 100 ACL: -// -// 1K ports => 5.27 s -// 5K ports => 30.39 s -// 10K ports => 60.5 s -// -// 5000 pg + 10 ACL: -// -// 100 ports => 5.9 s -// 200 ports => 16.2 s -// 1K ports => 58.5 s -// -// Considering 7 seconds is a safe transaction time, the following approximation may be applied: -// 1000 ports * 500 pg <= 7 seconds extra (weight = 1) -// => 1 port extra weight = 1/(500 * 1000) -func getPGWeight(acls, ports int) float64 { - portsWeight := float64(ports) / 500000 - - if acls < 10 { - // extra coefficient for a small number of acls - if acls == 0 { - // updating a port group without acls may be estimated with a port group with 1 acl update time - acls = 1 - } - return portsWeight + float64(acls)/30000 - } - // for acls > 10, use linear dependency - return portsWeight + float64(acls)/50000 -} - -func getControllerName(networkExternalID string) string { - if networkExternalID == "" { - return defaultNetworkControllerName - } - return networkExternalID + "-network-controller" -} - -// NewPortGroupSyncer creates a PortGroupSyncer that will sync port groups without the new ExternalIDs for all -// controllers. Controller name will be defined based on getControllerName() function. -func NewPortGroupSyncer(nbClient libovsdbclient.Client) *PortGroupSyncer { - return &PortGroupSyncer{ - nbClient: nbClient, - getPGWeight: getPGWeight, - } -} - -func getPortGroupNamespaceDbIDs(namespace, networkExternalID string) *libovsdbops.DbObjectIDs { - controllerName := getControllerName(networkExternalID) - return libovsdbops.NewDbObjectIDs(libovsdbops.PortGroupNamespace, controllerName, map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: namespace, - }) -} - -func getPortGroupNetpolNamespaceDbIDs(namespace, direction, networkExternalID string) *libovsdbops.DbObjectIDs { - controllerName := getControllerName(networkExternalID) - return libovsdbops.NewDbObjectIDs(libovsdbops.PortGroupNetpolNamespace, controllerName, map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: namespace, - libovsdbops.PolicyDirectionKey: direction, - }) -} - -func getPortGroupNetworkPolicyDbIDs(policyNamespace, policyName, networkExternalID string) *libovsdbops.DbObjectIDs { - controllerName := getControllerName(networkExternalID) - return libovsdbops.NewDbObjectIDs(libovsdbops.PortGroupNetworkPolicy, controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: fmt.Sprintf("%s:%s", policyNamespace, policyName), - }) -} - -func getPortGroupAdminNetworkPolicyDbIDs(anpName string, isBanp bool, networkExternalID string) *libovsdbops.DbObjectIDs { - controllerName := getControllerName(networkExternalID) - idsType := libovsdbops.PortGroupAdminNetworkPolicy - if isBanp { - idsType = libovsdbops.PortGroupBaselineAdminNetworkPolicy - } - return libovsdbops.NewDbObjectIDs(idsType, controllerName, - map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: anpName, - }) -} - -func getPortGroupClusterDbIDs(baseName, networkExternalID string) *libovsdbops.DbObjectIDs { - controllerName := getControllerName(networkExternalID) - return libovsdbops.NewDbObjectIDs(libovsdbops.PortGroupCluster, controllerName, map[libovsdbops.ExternalIDKey]string{ - libovsdbops.ObjectNameKey: baseName, - }) -} - -// getReferencingObjsAndNewDbIDs finds all object that reference stale port group and tries to create a new dbIDs -// based on referencing objects -func (syncer *PortGroupSyncer) getReferencingObjsAndNewDbIDs(oldHash, oldName, networkExternalID string, referencingACLUUIDs []string) (acls []*nbdb.ACL, - dbIDs *libovsdbops.DbObjectIDs, err error) { - // get all referencing objects - refACLs := []*nbdb.ACL{} - for _, aclUUID := range referencingACLUUIDs { - refACLs = append(refACLs, &nbdb.ACL{UUID: aclUUID}) - } - acls, err = libovsdbops.FindACLs(syncer.nbClient, refACLs) - if err != nil { - err = fmt.Errorf("failed to find acls for port group %s: %v", oldHash, err) - return - } - // build dbIDs - switch { - // Filter port groups with pre-defined names - case oldName == types.ClusterPortGroupNameBase || oldName == types.ClusterRtrPortGroupNameBase: - dbIDs = getPortGroupClusterDbIDs(oldName, networkExternalID) - case strings.HasPrefix(oldName, "ANP:"): - // ANP owned port group - dbIDs = getPortGroupAdminNetworkPolicyDbIDs(strings.TrimPrefix(oldName, "ANP:"), false, networkExternalID) - case strings.HasPrefix(oldName, "BANP:"): - // ANP owned port group - dbIDs = getPortGroupAdminNetworkPolicyDbIDs(strings.TrimPrefix(oldName, "BANP:"), true, networkExternalID) - case strings.Contains(oldName, "_"): - // network policy owned namespace - s := strings.SplitN(oldName, "_", 2) - if s[1] == ingressDefaultDenySuffix || s[1] == egressDefaultDenySuffix { - // default deny port group, name format = hash(namespace)_gressSuffix - // need to find unhashed namespace name, use referencing ACLs - if len(acls) == 0 { - // default deny port group should always have acls - err = fmt.Errorf("defaultDeny port group doesn't have any referencing ACLs, can't extract namespace") - return - } - // all default deny acls will have the same namespace as ExternalID - acl := acls[0] - namespace := acl.ExternalIDs[libovsdbops.ObjectNameKey.String()] - var direction string - if s[1] == ingressDefaultDenySuffix { - direction = policyDirectionIngress - } else { - direction = policyDirectionEgress - } - dbIDs = getPortGroupNetpolNamespaceDbIDs(namespace, direction, networkExternalID) - } else { - // s[0]=policyNamespace, s[1]=policyName - dbIDs = getPortGroupNetworkPolicyDbIDs(s[0], s[1], networkExternalID) - } - default: - // namespace port group name is just namespace - dbIDs = getPortGroupNamespaceDbIDs(oldName, networkExternalID) - } - // dbIDs is set - return -} - -func (syncer *PortGroupSyncer) getUpdatePortGroupOps(portGroupInfos []*updatePortGroupInfo) (ops []ovsdb.Operation, err error) { - // one referencing object may contain multiple references that need to be updated - // these maps are used to track referenced that need to be replaced for every object type - aclsToUpdate := map[string]*nbdb.ACL{} - - for _, portGroupInfo := range portGroupInfos { - oldName := portGroupInfo.oldPG.ExternalIDs["name"] - // create updated port group - ops, err = libovsdbops.CreateOrUpdatePortGroupsOps(syncer.nbClient, ops, portGroupInfo.newPG) - if err != nil { - return nil, fmt.Errorf("failed to get update port group ops for port group %s: %v", oldName, err) - } - // delete old port group - ops, err = libovsdbops.DeletePortGroupsOps(syncer.nbClient, ops, portGroupInfo.oldPG.Name) - if err != nil { - return nil, fmt.Errorf("failed to get update port group ops for port group %s: %v", oldName, err) - } - oldHash := "@" + portGroupInfo.oldPG.Name - newHash := "@" + portGroupInfo.newPG.Name - - for _, acl := range portGroupInfo.acls { - if _, ok := aclsToUpdate[acl.UUID]; !ok { - aclsToUpdate[acl.UUID] = acl - } - aclsToUpdate[acl.UUID].Match = strings.ReplaceAll(aclsToUpdate[acl.UUID].Match, oldHash, newHash) - } - } - - for _, acl := range aclsToUpdate { - ops, err = libovsdbops.UpdateACLsOps(syncer.nbClient, ops, acl) - if err != nil { - return nil, fmt.Errorf("failed to get update acl ops: %v", err) - } - } - - return -} - -// getPortGroupUpdateInfo adds db ops to update port group and objects that reference it -func (syncer *PortGroupSyncer) getPortGroupUpdateInfo(pg *nbdb.PortGroup, pgToACLs map[string][]string) (*updatePortGroupInfo, error) { - pgName := pg.ExternalIDs["name"] - networkExternalID := pg.ExternalIDs[types.NetworkExternalID] - if pgName == "" { - return nil, fmt.Errorf("port group doesn't have expected ExternalID[\"name\"]") - } - - acls, dbIDs, err := syncer.getReferencingObjsAndNewDbIDs(pg.Name, pgName, networkExternalID, pgToACLs[pg.Name]) - if err != nil { - return nil, fmt.Errorf("failed to get new dbIDs for port group %s: %v", pg.ExternalIDs["name"], err) - } - - // since we need to update portGroup.Name, which is an index and not listed in getAllUpdatableFields, - // we copy existing portGroup, update the required fields, and replace (delete and create) existing port group with the updated - newPG := pg.DeepCopy() - // reset UUID - newPG.UUID = "" - // update port group Name and ExternalIDs exactly the same way as in libovsdbops.BuildPortGroup - newPG.Name = libovsdbutil.GetPortGroupName(dbIDs) - newPG.ExternalIDs = dbIDs.GetExternalIDs() - return &updatePortGroupInfo{acls, pg, newPG}, nil -} - -func getPGNamesFromMatch(match string) []string { - pgs := []string{} - pgreg := regexp.MustCompile("@([a-zA-Z_.][a-zA-Z_.0-9]*)") - for res := pgreg.FindStringIndex(match); res != nil; res = pgreg.FindStringIndex(match) { - pgName := match[res[0]+1 : res[1]] - pgs = append(pgs, pgName) - match = match[res[1]:] - } - return pgs -} - -func (syncer *PortGroupSyncer) getReferencingACLs() (map[string][]string, error) { - pgToACLUUIDs := map[string][]string{} - _, err := libovsdbops.FindACLsWithPredicate(syncer.nbClient, func(acl *nbdb.ACL) bool { - aclPGs := getPGNamesFromMatch(acl.Match) - for _, pgName := range aclPGs { - pgToACLUUIDs[pgName] = append(pgToACLUUIDs[pgName], acl.UUID) - } - return false - }) - if err != nil { - return nil, err - } - return pgToACLUUIDs, nil -} - -// SyncPortGroups must be run after ACLs sync, since it uses new ACL.ExternalIDs -func (syncer *PortGroupSyncer) SyncPortGroups() error { - // stale port groups don't have controller ID - portGroupList, err := libovsdbops.FindPortGroupsWithPredicate(syncer.nbClient, libovsdbops.GetNoOwnerPredicate[*nbdb.PortGroup]()) - if err != nil { - return fmt.Errorf("failed to find stale port groups: %v", err) - } - - pgToACLs, err := syncer.getReferencingACLs() - if err != nil { - return fmt.Errorf("failed to find referencing acls: %v", err) - } - - klog.Infof("SyncPortGroups found %d stale port groups", len(portGroupList)) - - opsWeight := 0.0 - pgUpdateInfos := []*updatePortGroupInfo{} - i := 0 - - transact := func() error { - ops, err := syncer.getUpdatePortGroupOps(pgUpdateInfos) - if err != nil { - return fmt.Errorf("failed to get update port groups ops: %w", err) - } - _, err = libovsdbops.TransactAndCheck(syncer.nbClient, ops) - if err != nil { - return fmt.Errorf("failed to transact port group sync ops: %v", err) - } - opsWeight = 0.0 - pgUpdateInfos = []*updatePortGroupInfo{} - return nil - } - - for i < len(portGroupList) { - pgUpdateWeight := syncer.getPGWeight(len(portGroupList[i].ACLs), len(portGroupList[i].Ports)) - // This port group would overcome the maximum operation weight of 1 - // so transact all the previously accumulated ops first if any. - if opsWeight+pgUpdateWeight > 1 { - // time to transact - if err = transact(); err != nil { - return err - } - } - - // since the same acl may be affected by multiple port group updates, it is important to call - // getPortGroupUpdateInfo, which captures acls, after transact. - // Otherwise, transact may change acl that was captured by getPortGroupUpdateInfo and the following - // update will override the changes. - updateInfo, err := syncer.getPortGroupUpdateInfo(portGroupList[i], pgToACLs) - if err != nil { - return err - } - - opsWeight += pgUpdateWeight - pgUpdateInfos = append(pgUpdateInfos, updateInfo) - i++ - } - if len(pgUpdateInfos) > 0 { - // end of iteration, transact what is left - if err = transact(); err != nil { - return err - } - } - return nil -} diff --git a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync_test.go b/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync_test.go deleted file mode 100644 index 5d0e5e6f25..0000000000 --- a/go-controller/pkg/ovn/external_ids_syncer/port_group/port_group_sync_test.go +++ /dev/null @@ -1,303 +0,0 @@ -package port_group - -import ( - "fmt" - "strings" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" - - libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" - libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" - libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" -) - -type pgSync struct { - before *nbdb.PortGroup - after *libovsdbops.DbObjectIDs - afterTweak func(group *nbdb.PortGroup) - remove bool - leave bool -} - -// data is used to pass port group for initial and expected db state, initialDbState may be used to add objects -// of other types to the initial db state, and finalDbState may be used to set the expected state of objects -// passed in initialDbState. If finalDbState is nil, final state will be updated automatically by changing port group -// references for initial objects from initialDbState. -func testSyncerWithData(data []pgSync, initialDbState, finalDbState []libovsdbtest.TestData) { - // create initial db setup - var fakePortUUID string - var dbPortAndSwitch []libovsdbtest.TestData - - dbSetup := libovsdbtest.TestSetup{NBData: initialDbState} - for _, pgSync := range data { - dbSetup.NBData = append(dbSetup.NBData, pgSync.before) - if len(pgSync.before.Ports) > 0 { - fakePortUUID = pgSync.before.Ports[0] - } - } - if fakePortUUID != "" { - dbPort := &nbdb.LogicalSwitchPort{ - UUID: fakePortUUID, - Name: "fake-port", - } - - dbSwitch := &nbdb.LogicalSwitch{ - UUID: "fake-switch", - Name: "fake-switch", - Ports: []string{fakePortUUID}, - } - dbPortAndSwitch = []libovsdbtest.TestData{dbPort, dbSwitch} - dbSetup.NBData = append(dbSetup.NBData, dbPortAndSwitch...) - } - - libovsdbOvnNBClient, _, libovsdbCleanup, err := libovsdbtest.NewNBSBTestHarness(dbSetup) - defer libovsdbCleanup.Cleanup() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // create expected data using addressSetFactory - expectedDbState := initialDbState - if finalDbState != nil { - expectedDbState = finalDbState - } - if fakePortUUID != "" { - expectedDbState = append(expectedDbState, dbPortAndSwitch...) - } - - for _, pgSync := range data { - if pgSync.remove { - continue - } - if pgSync.leave { - expectedDbState = append(expectedDbState, pgSync.before) - } else if pgSync.after != nil { - updatedPG := getUpdatedPG(pgSync.before, pgSync.after) - if pgSync.afterTweak != nil { - pgSync.afterTweak(updatedPG) - } - expectedDbState = append(expectedDbState, updatedPG) - if finalDbState == nil { - for _, dbObj := range expectedDbState { - if acl, ok := dbObj.(*nbdb.ACL); ok { - acl.Match = strings.ReplaceAll(acl.Match, "@"+pgSync.before.Name, "@"+updatedPG.Name) - } - } - } - } - } - // run sync - syncer := NewPortGroupSyncer(libovsdbOvnNBClient) - // to make sure batching works, set it to 0.5 to cover number of batches = 0,1,>1 - syncer.getPGWeight = func(_, _ int) float64 { - return 0.5 - } - err = syncer.SyncPortGroups() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // check results - gomega.Eventually(libovsdbOvnNBClient).Should(libovsdbtest.HaveData(expectedDbState)) -} - -func createInitialPG(hashedName, name, networkName string, portUUIDs, aclUUIDs []string) *nbdb.PortGroup { - externalIDs := map[string]string{"name": name} - if networkName != "" { - externalIDs[types.NetworkExternalID] = networkName - } - return &nbdb.PortGroup{ - UUID: hashedName, - Name: hashedName, - ExternalIDs: externalIDs, - Ports: portUUIDs, - ACLs: aclUUIDs, - } -} - -func createReferencingACL(hashedName string, externalIDs map[string]string) *nbdb.ACL { - acl := libovsdbops.BuildACL( - "", - nbdb.ACLDirectionToLport, - types.EgressFirewallStartPriority, - "outport == @"+hashedName+" && ip4.src == $namespaceAS", - nbdb.ACLActionAllow, - types.OvnACLLoggingMeter, - "", - false, - externalIDs, - nil, - types.DefaultACLTier, - ) - acl.UUID = hashedName + "-UUID" - return acl -} - -func getUpdatedPG(pg *nbdb.PortGroup, dbIDs *libovsdbops.DbObjectIDs) *nbdb.PortGroup { - newPG := pg.DeepCopy() - newPG.UUID += "-new" - - newPG.Name = libovsdbutil.GetPortGroupName(dbIDs) - newPG.ExternalIDs = dbIDs.GetExternalIDs() - return newPG -} - -func hashedPG(s string) string { - return util.HashForOVN(s) -} - -func getNetworkScopedName(netName, name string) string { - if netName == "" { - return name - } - return fmt.Sprintf("%s%s", util.GetSecondaryNetworkPrefix(netName), name) -} - -var _ = ginkgo.Describe("OVN Port Group Syncer", func() { - const ( - defaultNetworkExternalID = "" - anotherControllerName = "another-controller" - fakePortUUID = "portUUID" - secondaryNetworkExternalID = "secondary" - ) - - ginkgo.It("skips port groups with owner", func() { - testData := []pgSync{ - { - before: &nbdb.PortGroup{ - UUID: "pg1", - Name: hashedPG("as1"), - ExternalIDs: map[string]string{ - libovsdbops.OwnerControllerKey.String(): anotherControllerName, - "name": "pg_name"}, - }, - leave: true, - }, - } - testSyncerWithData(testData, nil, nil) - }) - // Cluster port groups are only created by the Default Controller at this point - ginkgo.It("updates port group owned by ClusterOwnerType and its references", func() { - acl1 := createReferencingACL(types.ClusterPortGroupNameBase, nil) - acl2 := createReferencingACL(types.ClusterRtrPortGroupNameBase, nil) - testData := []pgSync{ - { - before: createInitialPG(types.ClusterPortGroupNameBase, types.ClusterPortGroupNameBase, defaultNetworkExternalID, - []string{fakePortUUID}, []string{acl1.UUID}), - after: getPortGroupClusterDbIDs(types.ClusterPortGroupNameBase, defaultNetworkExternalID), - }, - { - before: createInitialPG(types.ClusterRtrPortGroupNameBase, types.ClusterRtrPortGroupNameBase, defaultNetworkExternalID, - []string{fakePortUUID}, []string{acl2.UUID}), - after: getPortGroupClusterDbIDs(types.ClusterRtrPortGroupNameBase, defaultNetworkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl1, acl2} - testSyncerWithData(testData, initialDb, nil) - }) - // ANP/BANP port groups are only created by the Default Controller at this point - ginkgo.It("updates port group owned by AdminNetworkPolicy and its references", func() { - policyName := "test-anp" - readableName := "ANP:" + policyName - pgName := hashedPG("ANP:" + policyName) - acl := createReferencingACL(pgName, nil) - testData := []pgSync{ - { - before: createInitialPG(pgName, readableName, defaultNetworkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupAdminNetworkPolicyDbIDs(policyName, false, defaultNetworkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl} - testSyncerWithData(testData, initialDb, nil) - }) - ginkgo.It("updates port group owned by BaselineAdminNetworkPolicy and its references", func() { - policyName := "test-banp" - readableName := "BANP:" + policyName - pgName := hashedPG("BANP:" + policyName) - acl := createReferencingACL(pgName, nil) - testData := []pgSync{ - { - before: createInitialPG(pgName, readableName, defaultNetworkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupAdminNetworkPolicyDbIDs(policyName, true, defaultNetworkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl} - testSyncerWithData(testData, initialDb, nil) - }) - // port groups that exist both for the Default and Secondary controller - for _, networkExternalID := range []string{defaultNetworkExternalID, secondaryNetworkExternalID} { - networkExternalID := networkExternalID - // verify different port group owners - ginkgo.It(fmt.Sprintf("updates port group owned by NamespaceOwnerType and its references, network %s", networkExternalID), func() { - namespaceName := "namespace" - pgName := hashedPG(getNetworkScopedName(networkExternalID, namespaceName)) - acl := createReferencingACL(pgName, nil) - testData := []pgSync{ - { - before: createInitialPG(pgName, namespaceName, networkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupNamespaceDbIDs(namespaceName, networkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl} - testSyncerWithData(testData, initialDb, nil) - }) - ginkgo.It(fmt.Sprintf("updates port group owned by NetpolNamespaceOwnerType and its references, network %s", networkExternalID), func() { - namespaceName := "namespace" - pgName := hashedPG(getNetworkScopedName(networkExternalID, namespaceName)) + "_" + egressDefaultDenySuffix - // default deny port group's namespace is extracted from the referencing acl - acl := createReferencingACL(pgName, map[string]string{ - libovsdbops.ObjectNameKey.String(): namespaceName, - }) - testData := []pgSync{ - { - before: createInitialPG(pgName, pgName, networkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupNetpolNamespaceDbIDs(namespaceName, "Egress", networkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl} - testSyncerWithData(testData, initialDb, nil) - }) - ginkgo.It(fmt.Sprintf("updates port group owned by NetworkPolicyOwnerType and its references, network %s", networkExternalID), func() { - namespaceName := "namespace" - policyName := "netpol" - readableName := fmt.Sprintf("%s_%s", namespaceName, policyName) - pgName := hashedPG(getNetworkScopedName(networkExternalID, readableName)) - acl := createReferencingACL(pgName, nil) - testData := []pgSync{ - { - before: createInitialPG(pgName, readableName, networkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupNetworkPolicyDbIDs(namespaceName, policyName, networkExternalID), - }, - } - initialDb := []libovsdbtest.TestData{acl} - testSyncerWithData(testData, initialDb, nil) - }) - } - for _, pgCount := range []int{0, 1, 2, 3, 4} { - pgCount := pgCount - // verify different batch sizes - ginkgo.It(fmt.Sprintf("test batching, pg number %d", pgCount), func() { - initialDb := []libovsdbtest.TestData{} - testData := []pgSync{} - for i := 0; i < pgCount; i++ { - namespaceName := fmt.Sprintf("namespace-%d", i) - pgName := hashedPG(getNetworkScopedName(defaultNetworkExternalID, namespaceName)) + "_" + egressDefaultDenySuffix - // default deny port group's namespace is extracted from the referencing acl - acl := createReferencingACL(pgName, map[string]string{ - libovsdbops.ObjectNameKey.String(): namespaceName, - }) - testData = append(testData, pgSync{ - before: createInitialPG(pgName, pgName, defaultNetworkExternalID, - []string{fakePortUUID}, []string{acl.UUID}), - after: getPortGroupNetpolNamespaceDbIDs(namespaceName, "Egress", defaultNetworkExternalID), - }) - initialDb = append(initialDb, acl) - } - testSyncerWithData(testData, initialDb, nil) - }) - } -}) diff --git a/go-controller/pkg/ovn/gateway_test.go b/go-controller/pkg/ovn/gateway_test.go index c52e3371e7..da48869991 100644 --- a/go-controller/pkg/ovn/gateway_test.go +++ b/go-controller/pkg/ovn/gateway_test.go @@ -14,7 +14,9 @@ import ( utilnet "k8s.io/utils/net" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -31,6 +33,33 @@ func init() { format.MaxLength = 0 } +func generateAdvertisedUDNIsolationExpectedNB(testData []libovsdbtest.TestData, networkName string, networkID int, clusterIPSubnets []*net.IPNet, nodeSwitch *nbdb.LogicalSwitch, addrSet addressset.AddressSet) []libovsdbtest.TestData { + var passMatches []string + for _, subnet := range clusterIPSubnets { + ipPrefix := "ip4" + if utilnet.IsIPv6CIDR(subnet) { + ipPrefix = "ip6" + } + passMatches = append(passMatches, fmt.Sprintf("(%s.src == %s && %s.dst == %s)", ipPrefix, subnet, ipPrefix, subnet)) + + } + passACL := libovsdbutil.BuildACL( + GetAdvertisedNetworkSubnetsPassACLdbIDs(DefaultNetworkControllerName, networkName, networkID), + types.AdvertisedNetworkPassPriority, + strings.Join(passMatches, " || "), + nbdb.ACLActionPass, + nil, + libovsdbutil.LportEgressAfterLB) + passACL.Tier = types.PrimaryACLTier + passACL.UUID = "advertised-udn-isolation-pass-acl-UUID" + dropACL := BuildAdvertisedNetworkSubnetsDropACL(addrSet) + dropACL.UUID = "advertised-udn-isolation-drop-acl-UUID" + nodeSwitch.ACLs = append(nodeSwitch.ACLs, passACL.UUID, dropACL.UUID) + testData = append(testData, passACL, dropACL) + + return testData +} + func generateGatewayInitExpectedNB(testData []libovsdbtest.TestData, expectedOVNClusterRouter *nbdb.LogicalRouter, expectedNodeSwitch *nbdb.LogicalSwitch, nodeName string, clusterIPSubnets []*net.IPNet, hostSubnets []*net.IPNet, l3GatewayConfig *util.L3GatewayConfig, joinLRPIPs, defLRPIPs []*net.IPNet, skipSnat bool, nodeMgmtPortIP, diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 22e794e1bd..8d4b57dda7 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -335,6 +335,9 @@ func addNodeLogicalFlowsHelper(testData []libovsdbtest.TestData, expectedOVNClus MAC: node.NodeLRPMAC, Networks: []string{node.NodeGWIP}, GatewayChassis: []string{chassisName + "-UUID"}, + Options: map[string]string{ + "gateway_mtu": "1400", + }, }) if serviceControllerEnabled { testData = append(testData, &nbdb.ChassisTemplateVar{ @@ -1270,6 +1273,11 @@ var _ = ginkgo.Describe("Default network controller operations", func() { []*net.IPNet{classBIPAddress(node1.LrpIP)}, []*net.IPNet{classBIPAddress(node1.DrLrpIP)}, skipSnat, node1.NodeMgmtPortIP, "1400") + if oc.isPodNetworkAdvertisedAtNode(node1.Name) { + addrSet, err := oc.addressSetFactory.GetAddressSet(GetAdvertisedNetworkSubnetsAddressSetDBIDs()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + expectedNBDatabaseState = generateAdvertisedUDNIsolationExpectedNB(expectedNBDatabaseState, oc.GetNetworkName(), oc.GetNetworkID(), clusterSubnets, expectedNodeSwitch, addrSet) + } GR = nil for _, testObj := range expectedNBDatabaseState { if router, ok := testObj.(*nbdb.LogicalRouter); ok && router.UUID == types.GWRouterPrefix+node1.Name+"-UUID" { diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index d0babe92ba..7a1aad8ed7 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -402,7 +402,11 @@ func (oc *DefaultNetworkController) syncNodeGateway(node *corev1.Node, hostSubne return fmt.Errorf("error creating gateway for node %s: %v", node.Name, err) } } - return nil + + if util.IsPodNetworkAdvertisedAtNode(oc, node.Name) { + return oc.addAdvertisedNetworkIsolation(node.Name) + } + return oc.deleteAdvertisedNetworkIsolation(node.Name) } // gatewayChanged() compares old annotations to new and returns true if something has changed. @@ -427,6 +431,10 @@ func nodeSubnetChanged(oldNode, node *corev1.Node, netName string) bool { func joinCIDRChanged(oldNode, node *corev1.Node, netName string) bool { var oldCIDRs, newCIDRs map[string]json.RawMessage + if oldNode.Annotations[util.OVNNodeGRLRPAddrs] == node.Annotations[util.OVNNodeGRLRPAddrs] { + return false + } + if err := json.Unmarshal([]byte(oldNode.Annotations[util.OVNNodeGRLRPAddrs]), &oldCIDRs); err != nil { klog.Errorf("Failed to unmarshal old node %s annotation: %v", oldNode.Name, err) return false diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index b7d11000d5..c11ca2a2ae 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -594,8 +594,14 @@ func (oc *SecondaryLayer2NetworkController) addUpdateLocalNodeEvent(node *corev1 } else { if !util.IsPodNetworkAdvertisedAtNode(oc, node.Name) { err = oc.addUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName) + if err == nil && util.IsRouteAdvertisementsEnabled() { + err = oc.deleteAdvertisedNetworkIsolation(node.Name) + } } else { err = oc.deleteUDNClusterSubnetEgressSNAT(gwConfig.hostSubnets, gwManager.gwRouterName) + if err == nil { + err = oc.addAdvertisedNetworkIsolation(node.Name) + } } if err != nil { errs = append(errs, err) diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index e2a4b84e35..65ca015ab7 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -610,6 +610,20 @@ func (oc *SecondaryLayer3NetworkController) run() error { } } + // start NetworkQoS controller if feature is enabled + if config.OVNKubernetesFeature.EnableNetworkQoS { + err := oc.newNetworkQoSController() + if err != nil { + return fmt.Errorf("unable to create network qos controller, err: %w", err) + } + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + // Until we have scale issues in future let's spawn only one thread + oc.nqosController.Run(1, oc.stopChan) + }() + } + klog.Infof("Completing all the Watchers for network %s took %v", oc.GetNetworkName(), time.Since(start)) return nil @@ -916,10 +930,18 @@ func (oc *SecondaryLayer3NetworkController) addNode(node *corev1.Node) ([]*net.I if err := oc.addUDNNodeSubnetEgressSNAT(hostSubnets, node); err != nil { return nil, err } + if util.IsRouteAdvertisementsEnabled() { + if err := oc.deleteAdvertisedNetworkIsolation(node.Name); err != nil { + return nil, err + } + } } else { if err := oc.deleteUDNNodeSubnetEgressSNAT(hostSubnets, node); err != nil { return nil, err } + if err := oc.addAdvertisedNetworkIsolation(node.Name); err != nil { + return nil, err + } } } return hostSubnets, nil diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go index 1d44fdef42..077a6fd822 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go @@ -830,7 +830,14 @@ func expectedLayer3EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC ExternalIDs: standardNonDefaultNetworkExtIDs(netInfo), Nat: []string{masqSNATUUID1}, }, - &nbdb.LogicalRouterPort{UUID: rtosLRPUUID, Name: rtosLRPName, Networks: []string{"192.168.1.1/24"}, MAC: "0a:58:c0:a8:01:01", GatewayChassis: []string{gatewayChassisUUID}}, + &nbdb.LogicalRouterPort{ + UUID: rtosLRPUUID, + Name: rtosLRPName, + Networks: []string{"192.168.1.1/24"}, + MAC: "0a:58:c0:a8:01:01", + GatewayChassis: []string{gatewayChassisUUID}, + Options: map[string]string{"gateway_mtu": "1400"}, + }, expectedGRStaticRoute(staticRouteUUID1, nodeSubnet.String(), lrsrNextHop, &nbdb.LogicalRouterStaticRoutePolicySrcIP, nil, netInfo), expectedGRStaticRoute(staticRouteUUID2, gwRouterJoinIPAddress().IP.String(), gwRouterJoinIPAddress().IP.String(), nil, nil, netInfo), expectedLogicalRouterPolicy(routerPolicyUUID1, netInfo, nodeName, nodeIP, managementPortIP(nodeSubnet).String()), diff --git a/go-controller/pkg/ovn/udn_isolation.go b/go-controller/pkg/ovn/udn_isolation.go index 56d8d51490..6c44489f9c 100644 --- a/go-controller/pkg/ovn/udn_isolation.go +++ b/go-controller/pkg/ovn/udn_isolation.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "k8s.io/klog/v2" @@ -15,6 +16,7 @@ import ( libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" @@ -237,3 +239,165 @@ func (oc *DefaultNetworkController) getUDNOpenPortDbIDs(podNamespacedName string libovsdbops.PolicyDirectionKey: string(aclDir), }) } + +// advertisedNetworkSubnetsKey is the object name key for the global advertised networks addressset and the global deny ACL +const advertisedNetworkSubnetsKey = "advertised-network-subnets" + +// GetAdvertisedNetworkSubnetsAddressSetDBIDs returns the DB IDs for the advertised network subnets addressset +func GetAdvertisedNetworkSubnetsAddressSetDBIDs() *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetAdvertisedNetwork, DefaultNetworkControllerName, map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: advertisedNetworkSubnetsKey, + }) +} + +// GetAdvertisedNetworkSubnetsDropACLdbIDs returns the DB IDs for the advertised network subnets drop ACL +func GetAdvertisedNetworkSubnetsDropACLdbIDs() *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.ACLAdvertisedNetwork, DefaultNetworkControllerName, + map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: advertisedNetworkSubnetsKey, + libovsdbops.NetworkKey: "", + }) +} + +// GetAdvertisedNetworkSubnetsPassACLdbIDs returns the DB IDs for the advertised network subnets pass ACL +func GetAdvertisedNetworkSubnetsPassACLdbIDs(controller, networkName string, networkID int) *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.ACLAdvertisedNetwork, controller, + map[libovsdbops.ExternalIDKey]string{ + libovsdbops.ObjectNameKey: networkName, + libovsdbops.NetworkKey: strconv.Itoa(networkID), + }) +} + +// BuildAdvertisedNetworkSubnetsDropACL builds the advertised network subnets drop ACL: +// action match priority +// ------ --------------------------------------------------------------------------- -------- +// drop "(ip[4|6].src == $ && ip[4|6].dst == $)" 1050 +func BuildAdvertisedNetworkSubnetsDropACL(advertisedNetworkSubnetsAddressSet addressset.AddressSet) *nbdb.ACL { + var dropMatches []string + v4AddrSet, v6AddrSet := advertisedNetworkSubnetsAddressSet.GetASHashNames() + if v4AddrSet != "" { + dropMatches = append(dropMatches, fmt.Sprintf("(ip4.src == $%s && ip4.dst == $%s)", v4AddrSet, v4AddrSet)) + } + if v6AddrSet != "" { + dropMatches = append(dropMatches, fmt.Sprintf("(ip6.src == $%s && ip6.dst == $%s)", v6AddrSet, v6AddrSet)) + } + + dropACL := libovsdbutil.BuildACL( + GetAdvertisedNetworkSubnetsDropACLdbIDs(), + types.AdvertisedNetworkDenyPriority, + strings.Join(dropMatches, " || "), + nbdb.ACLActionDrop, + nil, + libovsdbutil.LportEgressAfterLB) + dropACL.Tier = types.PrimaryACLTier + return dropACL +} + +// addAdvertisedNetworkIsolation adds advertised network isolation rules to the given node. +// It adds the following ACLs to the node switch: +// action match priority +// ------ --------------------------------------------------------------------------- -------- +// pass "(ip[4|6].src == && ip[4|6].dst == )" 1100 +// drop "(ip[4|6].src == $ && ip[4|6].dst == $)" 1050 +func (bnc *BaseNetworkController) addAdvertisedNetworkIsolation(nodeName string) error { + var passMatches, cidrs []string + var ops []ovsdb.Operation + + addrSet, err := bnc.addressSetFactory.GetAddressSet(GetAdvertisedNetworkSubnetsAddressSetDBIDs()) + if err != nil { + return fmt.Errorf("failed to get advertised subnets addresset %s for network %s: %w", GetAdvertisedNetworkSubnetsAddressSetDBIDs(), bnc.GetNetworkName(), err) + } + + for _, subnet := range bnc.Subnets() { + ipPrefix := "ip4" + if utilnet.IsIPv6CIDR(subnet.CIDR) { + ipPrefix = "ip6" + } + passMatches = append(passMatches, fmt.Sprintf("(%s.src == %s && %s.dst == %s)", ipPrefix, subnet.CIDR, ipPrefix, subnet.CIDR)) + cidrs = append(cidrs, subnet.CIDR.String()) + + } + + addrOps, err := addrSet.AddAddressesReturnOps(cidrs) + if err != nil { + return fmt.Errorf("failed to add addresses %q to the %s address set for network %s: %w", cidrs, GetAdvertisedNetworkSubnetsAddressSetDBIDs(), bnc.GetNetworkName(), err) + } + ops = append(ops, addrOps...) + + if len(passMatches) > 0 { + passACL := libovsdbutil.BuildACL( + GetAdvertisedNetworkSubnetsPassACLdbIDs(bnc.controllerName, bnc.GetNetworkName(), bnc.GetNetworkID()), + types.AdvertisedNetworkPassPriority, + strings.Join(passMatches, " || "), + nbdb.ACLActionPass, + nil, + libovsdbutil.LportEgressAfterLB) + passACL.Tier = types.PrimaryACLTier + + ops, err = libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, ops, nil, passACL) + if err != nil { + return fmt.Errorf("failed to create or update network isolation pass ACL %s for network %s: %w", GetAdvertisedNetworkSubnetsPassACLdbIDs(bnc.controllerName, bnc.GetNetworkName(), bnc.GetNetworkID()), bnc.GetNetworkName(), err) + } + ops, err = libovsdbops.AddACLsToLogicalSwitchOps(bnc.nbClient, ops, bnc.GetNetworkScopedSwitchName(nodeName), passACL) + if err != nil { + return fmt.Errorf("failed to add network isolation pass ACL to switch %s for network %s: %w", bnc.GetNetworkScopedSwitchName(nodeName), bnc.GetNetworkName(), err) + } + } + + dropACL := BuildAdvertisedNetworkSubnetsDropACL(addrSet) + ops, err = libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, ops, nil, dropACL) + if err != nil { + return fmt.Errorf("failed to create or update network isolation drop ACL %v", err) + } + ops, err = libovsdbops.AddACLsToLogicalSwitchOps(bnc.nbClient, ops, bnc.GetNetworkScopedSwitchName(nodeName), dropACL) + if err != nil { + return fmt.Errorf("failed to add network isolation drop ACL to switch %s for network %s: %w", bnc.GetNetworkScopedSwitchName(nodeName), bnc.GetNetworkName(), err) + } + + if _, err = libovsdbops.TransactAndCheck(bnc.nbClient, ops); err != nil { + return fmt.Errorf("failed to configure network isolation OVN rules for network %s: %w", bnc.GetNetworkName(), err) + } + return nil +} + +// deleteAdvertisedNetworkIsolation deletes advertised network isolation rules from the given node switch. +// It removes the network CIDRs from the global advertised networks addresset together with the ACLs on the node switch. +func (bnc *BaseNetworkController) deleteAdvertisedNetworkIsolation(nodeName string) error { + addrSet, err := bnc.addressSetFactory.GetAddressSet(GetAdvertisedNetworkSubnetsAddressSetDBIDs()) + if err != nil { + return fmt.Errorf("failed to get advertised subnets addresset %s for network %s: %w", GetAdvertisedNetworkSubnetsAddressSetDBIDs(), bnc.GetNetworkName(), err) + } + + var cidrs []string + for _, subnet := range bnc.Subnets() { + cidrs = append(cidrs, subnet.CIDR.String()) + } + ops, err := addrSet.DeleteAddressesReturnOps(cidrs) + if err != nil { + return fmt.Errorf("failed to create ovsdb ops for deleting the addresses from %s addresset for network %s: %w", GetAdvertisedNetworkSubnetsAddressSetDBIDs(), bnc.GetNetworkName(), err) + } + + passACLIDs := GetAdvertisedNetworkSubnetsPassACLdbIDs(bnc.controllerName, bnc.GetNetworkName(), bnc.GetNetworkID()) + passACLPredicate := libovsdbops.GetPredicate[*nbdb.ACL](passACLIDs, nil) + passACLs, err := libovsdbops.FindACLsWithPredicate(bnc.nbClient, passACLPredicate) + if err != nil { + return fmt.Errorf("unable to find the pass ACL for advertised network %s: %w", bnc.GetNetworkName(), err) + } + + dropACLIDs := GetAdvertisedNetworkSubnetsDropACLdbIDs() + dropACLPredicate := libovsdbops.GetPredicate[*nbdb.ACL](dropACLIDs, nil) + dropACLs, err := libovsdbops.FindACLsWithPredicate(bnc.nbClient, dropACLPredicate) + if err != nil { + return fmt.Errorf("unable to find the drop ACL for advertised network %s: %w", bnc.GetNetworkName(), err) + } + + // ACLs referenced by the switch will be deleted by db if there are no other references + p := func(sw *nbdb.LogicalSwitch) bool { return sw.Name == bnc.GetNetworkScopedSwitchName(nodeName) } + ops, err = libovsdbops.RemoveACLsFromLogicalSwitchesWithPredicateOps(bnc.nbClient, ops, p, append(passACLs, dropACLs...)...) + if err != nil { + return fmt.Errorf("failed to create ovsdb ops for removing network isolation ACLs from the %s switch for network %s: %w", bnc.GetNetworkScopedSwitchName(nodeName), bnc.GetNetworkName(), err) + } + + _, err = libovsdbops.TransactAndCheck(bnc.nbClient, ops) + return err +} diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go index 800aa8bcf1..172cac5e33 100644 --- a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go @@ -134,9 +134,29 @@ func (zch *ZoneChassisHandler) createOrUpdateNodeChassis(node *corev1.Node, isRe node.Name, parsedErr) } - nodePrimaryIp, err := util.GetNodePrimaryIP(node) + // Get the encap IPs. + encapIPs, err := util.ParseNodeEncapIPsAnnotation(node) if err != nil { - return fmt.Errorf("failed to parse node %s primary IP %w", node.Name, err) + return fmt.Errorf("failed to parse node-encap-ips for node - %s, error: %w", + node.Name, err) + } + + encaps := make([]*sbdb.Encap, 0, len(encapIPs)) + encapOptions := map[string]string{} + encapOptions["csum"] = "true" + // set the geneve port if using something else than default + if config.Default.EncapPort != config.DefaultEncapPort { + encapOptions["dst_port"] = strconv.FormatUint(uint64(config.Default.EncapPort), 10) + } + + for _, ovnEncapIP := range encapIPs { + encap := sbdb.Encap{ + ChassisName: chassisID, + IP: strings.TrimSpace(ovnEncapIP), + Type: "geneve", + Options: encapOptions, + } + encaps = append(encaps, &encap) } chassis := sbdb.Chassis{ @@ -147,17 +167,5 @@ func (zch *ZoneChassisHandler) createOrUpdateNodeChassis(node *corev1.Node, isRe }, } - encap := sbdb.Encap{ - ChassisName: chassisID, - IP: nodePrimaryIp, - Type: "geneve", - Options: map[string]string{"csum": "true"}, - } - - // set the geneve port if using something else than default - if config.Default.EncapPort != config.DefaultEncapPort { - encap.Options["dst_port"] = strconv.FormatUint(uint64(config.Default.EncapPort), 10) - } - - return libovsdbops.CreateOrUpdateChassis(zch.sbClient, &chassis, &encap) + return libovsdbops.CreateOrUpdateChassis(zch.sbClient, &chassis, encaps...) } diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go index a9d3dcd92c..05b9fb6b9c 100644 --- a/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler_test.go @@ -16,6 +16,7 @@ import ( libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/sbdb" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { @@ -25,9 +26,14 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { testNode1 corev1.Node testNode2 corev1.Node testNode3 corev1.Node + testNode4 corev1.Node + testNode5 corev1.Node node1Chassis sbdb.Chassis node2Chassis sbdb.Chassis node3Chassis sbdb.Chassis + node4Chassis sbdb.Chassis + node5Chassis sbdb.Chassis + node5Encap sbdb.Encap initialSBDB []libovsdbtest.TestData ) @@ -50,11 +56,17 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { node1Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", Hostname: "node1", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6"} node2Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", Hostname: "node2", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7"} node3Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", Hostname: "node3", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8"} + node4Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9", Hostname: "node4", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9"} + node5Chassis = sbdb.Chassis{Name: "cb9ec8fa-b409-4ef3-9f42-d9283c47aaca", Hostname: "node5", UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9a", + Encaps: []string{"cb9ec8fa-b409-4ef3-9f42-d9283c47aacb"}} + node5Encap = sbdb.Encap{ChassisName: "cb9ec8fa-b409-4ef3-9f42-d9283c47aaca", IP: "10.0.0.16", Type: "geneve", + UUID: "cb9ec8fa-b409-4ef3-9f42-d9283c47aacb"} testNode1 = corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6"}, + Name: "node1", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + "k8s.ovn.org/node-encap-ips": "[\"10.0.0.10\"]"}, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.10"}}, @@ -62,8 +74,9 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { } testNode2 = corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7"}, + Name: "node2", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + "k8s.ovn.org/node-encap-ips": "[\"10.0.0.11\"]"}, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.11"}}, @@ -71,16 +84,31 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { } testNode3 = corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: "node3", - Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8"}, + Name: "node3", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + "k8s.ovn.org/node-encap-ips": "[\"10.0.0.12\"]"}, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{{Type: corev1.NodeInternalIP, Address: "10.0.0.12"}}, }, } + testNode4 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node4", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aac9", + "k8s.ovn.org/node-encap-ips": "[\"10.0.0.14\", \"10.0.0.15\"]"}, + }, + } + testNode5 = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node5", + Annotations: map[string]string{"k8s.ovn.org/node-chassis-id": "cb9ec8fa-b409-4ef3-9f42-d9283c47aaca", + "k8s.ovn.org/node-encap-ips": "[\"10.0.0.11\"]"}, + }, + } initialSBDB = []libovsdbtest.TestData{ - &node1Chassis, &node2Chassis} + &node1Chassis, &node2Chassis, &node5Chassis, &node5Encap} }) ginkgo.AfterEach(func() { @@ -155,9 +183,12 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { err = zoneChassisHandler.AddRemoteZoneNode(&testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + encapIP, err := util.ParseNodeEncapIPsAnnotation(&testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + encap := &sbdb.Encap{ Type: "geneve", - IP: testNode3.Status.Addresses[0].Address, + IP: encapIP[0], } err = libovsdbOvnSBClient.Get(context.Background(), encap) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -175,6 +206,108 @@ var _ = ginkgo.Describe("Zone Interconnect Chassis Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("Add multiple encap records", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + encapIP, err := util.ParseNodeEncapIPsAnnotation(&testNode4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + encap1 := &sbdb.Encap{ + Type: "geneve", + IP: encapIP[0], + } + + encap2 := &sbdb.Encap{ + Type: "geneve", + IP: encapIP[1], + } + + err = libovsdbOvnSBClient.Get(context.Background(), encap1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = libovsdbOvnSBClient.Get(context.Background(), encap2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node4Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.Encaps).To(gomega.HaveLen(2)) + gomega.Expect(nodeCh.Encaps).To(gomega.ContainElements(string(encap1.UUID)), string(encap2.UUID)) + + return nil + } + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Update encap record when chassis exists", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnSBClient libovsdbclient.Client + _, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneChassisHandler := NewZoneChassisHandler(libovsdbOvnSBClient) + err = zoneChassisHandler.AddRemoteZoneNode(&testNode5) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + encapIP, err := util.ParseNodeEncapIPsAnnotation(&testNode5) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + encap := &sbdb.Encap{ + Type: "geneve", + IP: encapIP[0], + } + + err = libovsdbOvnSBClient.Get(context.Background(), encap) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + nodeCh, err := libovsdbops.GetChassis(libovsdbOvnSBClient, &node5Chassis) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nodeCh.Encaps).To(gomega.HaveLen(1)) + gomega.Expect(nodeCh.Encaps).To(gomega.ContainElements(string(encap.UUID))) + + err = libovsdbOvnSBClient.Get(context.Background(), &node5Encap) + gomega.Expect(err).To(gomega.SatisfyAny(gomega.BeNil(), gomega.MatchError(libovsdbclient.ErrNotFound))) + + return nil + } + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("Move chassis zone", func() { app.Action = func(ctx *cli.Context) error { dbSetup := libovsdbtest.TestSetup{ diff --git a/go-controller/pkg/ovnwebhook/nodeadmission.go b/go-controller/pkg/ovnwebhook/nodeadmission.go index dbdc9b767e..b21a51bc87 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission.go @@ -52,6 +52,7 @@ var commonNodeAnnotationChecks = map[string]checkNodeAnnot{ return fmt.Errorf("%s can only be set to %s or %s, it cannot be removed", util.OvnNodeZoneName, types.OvnDefaultZone, nodeName) }, + util.OVNNodeEncapIPs: nil, } // interconnectNodeAnnotationChecks holds annotations allowed for ovnkube-node: users in IC environments diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index 5f9dfffb16..46484ae4fe 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -320,7 +320,11 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { } if r.ResourceHandler.NeedsUpdateDuringRetry && entry.config != nil && entry.newObj != nil { klog.Infof("%v retry: updating object %s", r.ResourceHandler.ObjType, objKey) - if err := r.ResourceHandler.UpdateResource(entry.config, entry.newObj, true); err != nil { + if !r.ResourceHandler.IsResourceScheduled(entry.newObj) { + // unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry. + // we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error. + klog.Errorf("%v retry: cannot update object that is not scheduled: %s", r.ResourceHandler.ObjType, objKey) + } else if err := r.ResourceHandler.UpdateResource(entry.config, entry.newObj, true); err != nil { entry.timeStamp = time.Now() entry.failedAttempts++ if entry.failedAttempts >= MaxFailedAttempts { @@ -336,14 +340,12 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { } else { // delete old object if needed if entry.oldObj != nil { - klog.Infof("Removing old object: %s %s (failed: %v)", - r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + klog.Infof("Removing old object: %s %s (failed: %v)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) if !r.ResourceHandler.IsResourceScheduled(entry.oldObj) { - klog.V(5).Infof("Retry: %s %s not scheduled", r.ResourceHandler.ObjType, objKey) - entry.failedAttempts++ - return - } - if err := r.ResourceHandler.DeleteResource(entry.oldObj, entry.config); err != nil { + // unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry. + // we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error. + klog.Errorf("%v retry: cannot delete object that was not scheduled %s", r.ResourceHandler.ObjType, objKey) + } else if err := r.ResourceHandler.DeleteResource(entry.oldObj, entry.config); err != nil { entry.timeStamp = time.Now() entry.failedAttempts++ if entry.failedAttempts >= MaxFailedAttempts { @@ -363,11 +365,10 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { if entry.newObj != nil { klog.Infof("Adding new object: %s %s", r.ResourceHandler.ObjType, objKey) if !r.ResourceHandler.IsResourceScheduled(entry.newObj) { - klog.V(5).Infof("Retry: %s %s not scheduled", r.ResourceHandler.ObjType, objKey) - entry.failedAttempts++ - return - } - if err := r.ResourceHandler.AddResource(entry.newObj, true); err != nil { + // unscheduled resources (pods) will be retried again later we do not track these as failures, and should not retry. + // we should avoid queuing objects to the retry handler that are not scheduled. Thus treat this as an error. + klog.Errorf("%v retry: cannot create object that is not scheduled %s", r.ResourceHandler.ObjType, objKey) + } else if err := r.ResourceHandler.AddResource(entry.newObj, true); err != nil { entry.timeStamp = time.Now() entry.failedAttempts++ if entry.failedAttempts >= MaxFailedAttempts { diff --git a/go-controller/pkg/testing/networkmanager/fake.go b/go-controller/pkg/testing/networkmanager/fake.go index d3d86d8af5..5857d5b98c 100644 --- a/go-controller/pkg/testing/networkmanager/fake.go +++ b/go-controller/pkg/testing/networkmanager/fake.go @@ -46,6 +46,7 @@ func (fcm *FakeControllerManager) Reconcile(_ string, _, _ util.NetInfo) error { type FakeNetworkManager struct { // namespace -> netInfo + // if netInfo is nil, it represents a namespace which contains the required UDN label but with no valid network. It will return invalid network error. PrimaryNetworks map[string]util.NetInfo } @@ -54,11 +55,15 @@ func (fnm *FakeNetworkManager) Start() error { return nil } func (fnm *FakeNetworkManager) Stop() {} func (fnm *FakeNetworkManager) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { - return fnm.GetActiveNetworkForNamespaceFast(namespace), nil + network := fnm.GetActiveNetworkForNamespaceFast(namespace) + if network == nil { + return nil, util.NewInvalidPrimaryNetworkError(namespace) + } + return network, nil } func (fnm *FakeNetworkManager) GetActiveNetworkForNamespaceFast(namespace string) util.NetInfo { - if primaryNetworks, ok := fnm.PrimaryNetworks[namespace]; ok && primaryNetworks != nil { + if primaryNetworks, ok := fnm.PrimaryNetworks[namespace]; ok { return primaryNetworks } return &util.DefaultNetInfo{} diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 4e0f972749..b06e337efd 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -86,6 +86,10 @@ const ( DefaultAllowPriority = 1001 // Default deny acl rule priority DefaultDenyPriority = 1000 + // Pass priority for isolated advertised networks + AdvertisedNetworkPassPriority = 1100 + // Deny priority for isolated advertised networks + AdvertisedNetworkDenyPriority = 1050 // ACL PlaceHolderACL Tier Priorities PrimaryUDNAllowPriority = 1001 @@ -305,4 +309,12 @@ const ( // CUDNPrefix of all CUDN network names CUDNPrefix = "cluster_udn_" + + // NFTNoPMTUDRemoteNodeIPsv4 is a set used to track remote node IPs that do not belong to + // the local node's subnet. + NFTNoPMTUDRemoteNodeIPsv4 = "no-pmtud-remote-node-ips-v4" + + // NFTNoPMTUDRemoteNodeIPsv6 is a set used to track remote node IPs that do not belong to + // the local node's subnet. + NFTNoPMTUDRemoteNodeIPsv6 = "no-pmtud-remote-node-ips-v6" ) diff --git a/go-controller/pkg/types/resource_status.go b/go-controller/pkg/types/resource_status.go index 2a69fd57c1..c7a2e51155 100644 --- a/go-controller/pkg/types/resource_status.go +++ b/go-controller/pkg/types/resource_status.go @@ -10,6 +10,7 @@ const ( APBRouteErrorMsg = "failed to apply policy" EgressFirewallErrorMsg = "EgressFirewall Rules not correctly applied" EgressQoSErrorMsg = "EgressQoS Rules not correctly applied" + NetworkQoSErrorMsg = "NetworkQoS Destinations not correctly applied" ) func GetZoneStatus(zoneID, message string) string { diff --git a/go-controller/pkg/util/fake_client.go b/go-controller/pkg/util/fake_client.go index 0286785d3f..51b624cac7 100644 --- a/go-controller/pkg/util/fake_client.go +++ b/go-controller/pkg/util/fake_client.go @@ -31,6 +31,8 @@ import ( egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + networkqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1" + networkqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned/fake" routeadvertisements "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1" routeadvertisementsfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/fake" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" @@ -45,6 +47,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { egressServiceObjects := []runtime.Object{} apbExternalRouteObjects := []runtime.Object{} anpObjects := []runtime.Object{} + networkQoSObjects := []runtime.Object{} v1Objects := []runtime.Object{} nads := []runtime.Object{} cloudObjects := []runtime.Object{} @@ -80,6 +83,8 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { raObjects = append(raObjects, object) case *frrapi.FRRConfiguration: frrObjects = append(frrObjects, object) + case *networkqos.NetworkQoS: + networkQoSObjects = append(networkQoSObjects, object) default: v1Objects = append(v1Objects, object) } @@ -107,6 +112,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { UserDefinedNetworkClient: udnfake.NewSimpleClientset(udnObjects...), RouteAdvertisementsClient: routeadvertisementsfake.NewSimpleClientset(raObjects...), FRRClient: frrfake.NewSimpleClientset(frrObjects...), + NetworkQoSClient: networkqosfake.NewSimpleClientset(networkQoSObjects...), } } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index f54a8019ab..7fb84610ea 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -50,6 +50,7 @@ import ( egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" egressserviceclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned" + networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" routeadvertisementsclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned" userdefinednetworkclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" ) @@ -71,6 +72,7 @@ type OVNClientset struct { UserDefinedNetworkClient userdefinednetworkclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface + NetworkQoSClient networkqosclientset.Interface } // OVNMasterClientset @@ -90,6 +92,7 @@ type OVNMasterClientset struct { UserDefinedNetworkClient userdefinednetworkclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface + NetworkQoSClient networkqosclientset.Interface } // OVNKubeControllerClientset @@ -107,6 +110,7 @@ type OVNKubeControllerClientset struct { NetworkAttchDefClient networkattchmentdefclientset.Interface UserDefinedNetworkClient userdefinednetworkclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface + NetworkQoSClient networkqosclientset.Interface } type OVNNodeClientset struct { @@ -134,6 +138,7 @@ type OVNClusterManagerClientset struct { UserDefinedNetworkClient userdefinednetworkclientset.Interface RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface + NetworkQoSClient networkqosclientset.Interface } const ( @@ -163,6 +168,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { UserDefinedNetworkClient: cs.UserDefinedNetworkClient, RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, + NetworkQoSClient: cs.NetworkQoSClient, } } @@ -181,6 +187,7 @@ func (cs *OVNMasterClientset) GetOVNKubeControllerClientset() *OVNKubeController NetworkAttchDefClient: cs.NetworkAttchDefClient, UserDefinedNetworkClient: cs.UserDefinedNetworkClient, RouteAdvertisementsClient: cs.RouteAdvertisementsClient, + NetworkQoSClient: cs.NetworkQoSClient, } } @@ -199,6 +206,7 @@ func (cs *OVNClientset) GetOVNKubeControllerClientset() *OVNKubeControllerClient NetworkAttchDefClient: cs.NetworkAttchDefClient, UserDefinedNetworkClient: cs.UserDefinedNetworkClient, RouteAdvertisementsClient: cs.RouteAdvertisementsClient, + NetworkQoSClient: cs.NetworkQoSClient, } } @@ -218,6 +226,7 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset UserDefinedNetworkClient: cs.UserDefinedNetworkClient, RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, + NetworkQoSClient: cs.NetworkQoSClient, } } @@ -522,6 +531,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + networkqosClientset, err := networkqosclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, ANPClient: anpClientset, @@ -538,6 +552,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { UserDefinedNetworkClient: userDefinedNetworkClientSet, RouteAdvertisementsClient: routeAdvertisementsClientset, FRRClient: frrClientset, + NetworkQoSClient: networkqosClientset, }, nil } diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index e1819a2af3..eb9ac6380c 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -10,6 +10,7 @@ import ( "strings" iputils "github.com/containernetworking/plugins/pkg/ip" + "github.com/vishvananda/netlink" utilnet "k8s.io/utils/net" ) @@ -358,3 +359,33 @@ func IPNetsIPToStringSlice(ips []*net.IPNet) []string { func CalculateRouteTableID(ifIndex int) int { return ifIndex + RoutingTableIDStart } + +// RouteEqual compare two routes +func RouteEqual(l, r *netlink.Route) bool { + if (l == nil) != (r == nil) { + return false + } + if l == r { + return true + } + if !l.Equal(*r) { + return false + } + return l.Family == r.Family && + l.MTU == r.MTU && + l.Window == r.Window && + l.Rtt == r.Rtt && + l.RttVar == r.RttVar && + l.Ssthresh == r.Ssthresh && + l.Cwnd == r.Cwnd && + l.AdvMSS == r.AdvMSS && + l.Reordering == r.Reordering && + l.Hoplimit == r.Hoplimit && + l.InitCwnd == r.InitCwnd && + l.Features == r.Features && + l.RtoMin == r.RtoMin && + l.InitRwnd == r.InitRwnd && + l.QuickACK == r.QuickACK && + l.Congctl == r.Congctl && + l.FastOpenNoCookie == r.FastOpenNoCookie +} diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index 7d6e524048..d3be36f2db 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -151,6 +151,9 @@ const ( // "l2-network-b":"10"} // }", ovnUDNLayer2NodeGRLRPTunnelIDs = "k8s.ovn.org/udn-layer2-node-gateway-router-lrp-tunnel-ids" + + // ovnNodeEncapIPs is used to indicate encap IPs set on the node + OVNNodeEncapIPs = "k8s.ovn.org/node-encap-ips" ) type L3GatewayConfig struct { @@ -1474,3 +1477,27 @@ func filterIPVersion(cidrs []netip.Prefix, v6 bool) []netip.Prefix { } return validCIDRs } + +func SetNodeEncapIPs(nodeAnnotator kube.Annotator, encapips sets.Set[string]) error { + return nodeAnnotator.Set(OVNNodeEncapIPs, sets.List(encapips)) +} + +// ParseNodeEncapIPsAnnotation returns the encap IPs set on a node +func ParseNodeEncapIPsAnnotation(node *corev1.Node) ([]string, error) { + encapIPsAnnotation, ok := node.Annotations[OVNNodeEncapIPs] + if !ok { + return nil, newAnnotationNotSetError("%s annotation not found for node %q", OVNNodeEncapIPs, node.Name) + } + + var encapIPs []string + if err := json.Unmarshal([]byte(encapIPsAnnotation), &encapIPs); err != nil { + return nil, fmt.Errorf("failed to unmarshal %s annotation for node %q: %v", + encapIPsAnnotation, node.Name, err) + } + + return encapIPs, nil +} + +func NodeEncapIPsChanged(oldNode, newNode *corev1.Node) bool { + return oldNode.Annotations[OVNNodeEncapIPs] != newNode.Annotations[OVNNodeEncapIPs] +} diff --git a/helm/ovn-kubernetes/README.md b/helm/ovn-kubernetes/README.md index cc78130274..cdf89a04b5 100644 --- a/helm/ovn-kubernetes/README.md +++ b/helm/ovn-kubernetes/README.md @@ -342,6 +342,15 @@ false Configure to use user defined networks (UDN) feature with ovn-kubernetes + + global.enableNetworkQos + string +
+""
+
+ + Enables network QoS support from/to pods + global.enableMulticast string diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index ddab6b479e..2b6edcaa8e 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -153,6 +153,8 @@ spec: value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVN_V4_TRANSIT_SWITCH_SUBNET value: {{ default "" .Values.global.v4TransitSwitchSubnet | quote }} - name: OVN_V6_TRANSIT_SWITCH_SUBNET diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml index f8591de1a7..4a62d3e661 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml @@ -73,6 +73,7 @@ rules: - adminpolicybasedexternalroutes - egressfirewalls - egressqoses + - networkqoses - userdefinednetworks - clusteruserdefinednetworks verbs: [ "get", "list", "watch" ] @@ -80,6 +81,7 @@ rules: resources: - egressips - egressservices/status + - networkqoses/status - userdefinednetworks - userdefinednetworks/status - clusteruserdefinednetworks diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index d06ee79a68..5e8a48a47a 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -274,6 +274,8 @@ spec: value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate true | quote }} - name: OVN_NOHOSTSUBNET_LABEL value: {{ default "k8s.ovn.org/ovn-managed=false" .Values.global.noHostSubnetLabel | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml index e742bbb5d4..7474c69f8f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml @@ -59,6 +59,7 @@ rules: resources: - namespaces - nodes + - nodes/status - pods - services - endpoints @@ -85,6 +86,7 @@ rules: - adminpolicybasedexternalroutes - userdefinednetworks - clusteruserdefinednetworks + - networkqoses verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.cni.cncf.io"] resources: @@ -109,6 +111,7 @@ rules: - egressfirewalls/status - egressips - egressqoses + - networkqoses - egressservices/status - adminpolicybasedexternalroutes/status - egressqoses/status @@ -117,6 +120,7 @@ rules: - clusteruserdefinednetworks - clusteruserdefinednetworks/status - clusteruserdefinednetworks/finalizers + - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: [""] resources: diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml index 123d5c3e01..6d7e840d01 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml @@ -198,6 +198,8 @@ spec: value: {{ default "" .Values.global.extGatewayNetworkInterface | quote }} - name: OVN_ENABLE_OVNKUBE_IDENTITY value: {{ hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVNKUBE_NODE_MODE value: "dpu-host" - name: OVNKUBE_NODE_MGMT_PORT_NETDEV diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml index 26be761fe7..9544653418 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml @@ -231,6 +231,8 @@ spec: value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVNKUBE_NODE_MODE value: "dpu" - name: OVN_HOST_NETWORK_NAMESPACE diff --git a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml index 7e58a260b1..e4b0a0621a 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml @@ -235,6 +235,8 @@ spec: value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} - name: OVNKUBE_NODE_MGMT_PORT_NETDEV value: {{ default "" .Values.global.nodeMgmtPortNetdev | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index f74726096f..d60276308b 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -441,6 +441,8 @@ spec: value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} readinessProbe: exec: command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnkube-node"] diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index 30332a08d6..f692ed0524 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -343,6 +343,8 @@ spec: value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} - name: OVN_ENABLE_SVC_TEMPLATE_SUPPORT value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate true | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/helm/ovn-kubernetes/crds/k8s.ovn.org_networkqoses.yaml b/helm/ovn-kubernetes/crds/k8s.ovn.org_networkqoses.yaml new file mode 120000 index 0000000000..cb314342ba --- /dev/null +++ b/helm/ovn-kubernetes/crds/k8s.ovn.org_networkqoses.yaml @@ -0,0 +1 @@ +../../../dist/templates/k8s.ovn.org_networkqoses.yaml.j2 \ No newline at end of file diff --git a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml index 9f201060ec..850df1b518 100644 --- a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml +++ b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml @@ -162,6 +162,7 @@ rules: - egressfirewalls/status - adminpolicybasedexternalroutes/status - egressqoses/status + - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: ["policy.networking.k8s.io"] resources: @@ -183,6 +184,7 @@ rules: - adminpolicybasedexternalroutes - userdefinednetworks - clusteruserdefinednetworks + - networkqoses verbs: [ "get", "list", "watch" ] {{- if eq (hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true) true }} - apiGroups: ["certificates.k8s.io"] diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index 716b4ebbea..2eef44ecae 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -68,6 +68,8 @@ global: enableEgressFirewall: true # -- Configure to use EgressQoS CRD feature with ovn-kubernetes enableEgressQos: true + # -- Enables network QoS support from/to pods + enableNetworkQos: false # -- Enables multicast support between the pods within the same namespace enableMulticast: "" # -- Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index bf2ad903c4..f643f81133 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -62,6 +62,8 @@ global: enableEgressFirewall: true # -- Configure to use EgressQoS CRD feature with ovn-kubernetes enableEgressQos: true + # -- Enables network QoS support from/to pods + enableNetworkQos: false # -- Enables multicast support between the pods within the same namespace enableMulticast: "" # -- Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index d5802f1f37..9747d45440 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -68,6 +68,8 @@ global: enableEgressFirewall: true # -- Configure to use EgressQoS CRD feature with ovn-kubernetes enableEgressQos: true + # -- Enables network QoS support from/to pods + enableNetworkQos: false # -- Enables multicast support between the pods within the same namespace enableMulticast: "" # -- Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes diff --git a/mkdocs.yml b/mkdocs.yml index ef081b915d..e21134af5a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -121,6 +121,7 @@ nav: - MultiNetworkPolicies: features/multiple-networks/multi-network-policies.md - MultiNetworkRails: features/multiple-networks/multi-vtep.md - Multicast: features/multicast.md + - NetworkQoS: features/network-qos.md - LiveMigration: features/live-migration.md - HybridOverlay: features/hybrid-overlay.md - Hardware Acceleration: @@ -138,5 +139,7 @@ nav: # - FeatureName: okeps/ - Template: okeps/okep-4368-template.md - Localnet API: okeps/okep-5085-localnet-api.md + - Network QoS: okeps/okep-4380-network-qos.md + - User Defined Networks: okeps/okep-5193-user-defined-networks.md - Blog: - blog/index.md diff --git a/test/Makefile b/test/Makefile index 4a543cb0c1..3602502471 100644 --- a/test/Makefile +++ b/test/Makefile @@ -3,14 +3,14 @@ JOB_NAME?="$@" # Make no assumptions about network, so run all tests. If on IPv4 or # IPv6 only network, set appropriately to skip related tests. -KIND_IPV4_SUPPORT?=false -KIND_IPV6_SUPPORT?=false +PLATFORM_IPV4_SUPPORT?=false +PLATFORM_IPV6_SUPPORT?=false DUALSTACK_CONVERSION?=false .PHONY: install-kind install-kind: - KIND_IPV4_SUPPORT=$(KIND_IPV4_SUPPORT) \ - KIND_IPV6_SUPPORT=$(KIND_IPV6_SUPPORT) \ + PLATFORM_IPV4_SUPPORT=$(PLATFORM_IPV4_SUPPORT) \ + PLATFORM_IPV6_SUPPORT=$(PLATFORM_IPV6_SUPPORT) \ ./scripts/install-kind.sh .PHONY : upgrade-ovn @@ -21,8 +21,8 @@ upgrade-ovn: shard-%: E2E_REPORT_DIR=$(E2E_REPORT_DIR) \ E2E_REPORT_PREFIX=$(JOB_NAME)_ \ - KIND_IPV4_SUPPORT=$(KIND_IPV4_SUPPORT) \ - KIND_IPV6_SUPPORT=$(KIND_IPV6_SUPPORT) \ + PLATFORM_IPV4_SUPPORT=$(PLATFORM_IPV4_SUPPORT) \ + PLATFORM_IPV6_SUPPORT=$(PLATFORM_IPV6_SUPPORT) \ DUALSTACK_CONVERSION=$(DUALSTACK_CONVERSION) \ SINGLE_NODE_CLUSTER=$(SINGLE_NODE_CLUSTER) \ ./scripts/e2e-kind.sh $@ $(WHAT) @@ -31,8 +31,8 @@ shard-%: control-plane: E2E_REPORT_DIR=$(E2E_REPORT_DIR) \ E2E_REPORT_PREFIX=$(JOB_NAME)_ \ - KIND_IPV4_SUPPORT=$(KIND_IPV4_SUPPORT) \ - KIND_IPV6_SUPPORT=$(KIND_IPV6_SUPPORT) \ + PLATFORM_IPV4_SUPPORT=$(PLATFORM_IPV4_SUPPORT) \ + PLATFORM_IPV6_SUPPORT=$(PLATFORM_IPV6_SUPPORT) \ OVN_HA=$(OVN_HA) \ ./scripts/e2e-cp.sh $(WHAT) diff --git a/test/e2e/containerengine/container_engine.go b/test/e2e/containerengine/container_engine.go new file mode 100644 index 0000000000..12d96829b2 --- /dev/null +++ b/test/e2e/containerengine/container_engine.go @@ -0,0 +1,42 @@ +package containerengine + +import ( + "fmt" + "os" + "strings" +) + +type ContainerEngine string + +func (ce ContainerEngine) String() string { + return string(ce) +} + +const ( + Docker ContainerEngine = "docker" + Podman ContainerEngine = "podman" +) + +var engine ContainerEngine + +func init() { + if cr, found := os.LookupEnv("CONTAINER_RUNTIME"); found { + switch strings.ToLower(cr) { + case Docker.String(): + engine = Docker + case Podman.String(): + engine = Podman + default: + panic(fmt.Sprintf("unknown container engine %q. Supported engines are docker or podman.", cr)) + } + } else { + engine = Docker + } +} + +func Get() ContainerEngine { + if engine.String() == "" { + panic("container engine is not set") + } + return engine +} diff --git a/test/e2e/deploymentconfig/README.MD b/test/e2e/deploymentconfig/README.MD new file mode 100644 index 0000000000..49ff839b43 --- /dev/null +++ b/test/e2e/deploymentconfig/README.MD @@ -0,0 +1,5 @@ +# Deployment Config + +## Description +Deployment Config contains platform configuration. +NB: Remove when OVN-Kubernetes provides an API to expose its API/Node config \ No newline at end of file diff --git a/test/e2e/deploymentconfig/api/api.go b/test/e2e/deploymentconfig/api/api.go new file mode 100644 index 0000000000..573ced8cb8 --- /dev/null +++ b/test/e2e/deploymentconfig/api/api.go @@ -0,0 +1,9 @@ +package api + +// DeploymentConfig offers visibility into the configuration OVN-Kubernetes environment for e2e test cases. This includes all host or node level config. +// Remove when OVN-Kubernetes exposes its config via an API. +type DeploymentConfig interface { + OVNKubernetesNamespace() string + ExternalBridgeName() string + PrimaryInterfaceName() string +} diff --git a/test/e2e/deploymentconfig/config.go b/test/e2e/deploymentconfig/config.go new file mode 100644 index 0000000000..27c788f1b0 --- /dev/null +++ b/test/e2e/deploymentconfig/config.go @@ -0,0 +1,25 @@ +package deploymentconfig + +import ( + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/configs/kind" +) + +var deployment api.DeploymentConfig + +func Set() { + // upstream currently uses KinD as its preferred platform infra, so if we detect KinD, its upstream + if kind.IsKind() { + deployment = kind.New() + } + if deployment == nil { + panic("failed to determine the deployment config") + } +} + +func Get() api.DeploymentConfig { + if deployment == nil { + panic("deployment config type not set") + } + return deployment +} diff --git a/test/e2e/deploymentconfig/configs/kind/kind.go b/test/e2e/deploymentconfig/configs/kind/kind.go new file mode 100644 index 0000000000..be3f35aa73 --- /dev/null +++ b/test/e2e/deploymentconfig/configs/kind/kind.go @@ -0,0 +1,42 @@ +package kind + +import ( + "fmt" + "os/exec" + "strings" + + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig/api" +) + +func IsKind() bool { + _, err := exec.LookPath("kind") + if err != nil { + return false + } + outBytes, err := exec.Command("kind", "get", "clusters").CombinedOutput() + if err != nil { + panic(fmt.Sprintf("failed to get KinD clusters: stdout: %q, err: %v", string(outBytes), err)) + } + if strings.Contains(string(outBytes), "ovn") { + return true + } + return false +} + +type kind struct{} + +func New() api.DeploymentConfig { + return kind{} +} + +func (k kind) OVNKubernetesNamespace() string { + return "ovn-kubernetes" +} + +func (k kind) ExternalBridgeName() string { + return "breth0" +} + +func (k kind) PrimaryInterfaceName() string { + return "eth0" +} diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 014ea37d85..ac9bc8fb3b 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -7,7 +7,6 @@ import ( "fmt" "net" "net/http" - "os/exec" "path" "regexp" "strconv" @@ -18,6 +17,12 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/containerengine" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -35,6 +40,7 @@ import ( e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" testutils "k8s.io/kubernetes/test/utils" + kexec "k8s.io/utils/exec" utilnet "k8s.io/utils/net" ) @@ -43,12 +49,8 @@ const ( retryInterval = 1 * time.Second // polling interval timer retryTimeout = 40 * time.Second // polling timeout rolloutTimeout = 10 * time.Minute - agnhostImage = "registry.k8s.io/e2e-test-images/agnhost:2.26" - agnhostImageNew = "registry.k8s.io/e2e-test-images/agnhost:2.53" - iperf3Image = "quay.io/sronanrh/iperf" redirectIP = "123.123.123.123" redirectPort = "13337" - exContainerName = "tcp-continuous-client" defaultPodInterface = "eth0" udnPodInterface = "ovn-udn1" ) @@ -56,28 +58,24 @@ const ( type podCondition = func(pod *v1.Pod) (bool, error) // setupHostRedirectPod -func setupHostRedirectPod(f *framework.Framework, node *v1.Node, exContainerName string, isIPv6 bool) error { - _, _ = createClusterExternalContainer(exContainerName, externalContainerImage, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) - nodeV4, nodeV6 := getContainerAddressesForNetwork(node.Name, externalContainerNetwork) +func setupHostRedirectPod(f *framework.Framework, externalContainer infraapi.ExternalContainer, nodeName, nodeIP string, isIPv6 bool) error { mask := 32 ipCmd := []string{"ip"} - nodeIP := nodeV4 if isIPv6 { mask = 128 ipCmd = []string{"ip", "-6"} - nodeIP = nodeV6 } - cmd := []string{"docker", "exec", exContainerName} + cmd := []string{} cmd = append(cmd, ipCmd...) cmd = append(cmd, "route", "add", fmt.Sprintf("%s/%d", redirectIP, mask), "via", nodeIP) - _, err := runCommand(cmd...) + _, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, cmd) // cleanup not needed because containers persist for a single tests lifetime if err != nil { return err } // setup redirect iptables rule in node ipTablesArgs := []string{"PREROUTING", "-t", "nat", "--dst", redirectIP, "-j", "REDIRECT"} - updateIPTablesRulesForNode("insert", node.Name, ipTablesArgs, isIPv6) + updateIPTablesRulesForNode("insert", nodeName, ipTablesArgs, isIPv6) command := []string{ "bash", "-c", @@ -94,11 +92,11 @@ func setupHostRedirectPod(f *framework.Framework, node *v1.Node, exContainerName Containers: []v1.Container{ { Name: tcpServer, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, - NodeName: node.Name, + NodeName: nodeName, RestartPolicy: v1.RestartPolicyNever, HostNetwork: true, }, @@ -135,7 +133,7 @@ func checkContinuousConnectivity(f *framework.Framework, nodeName, podName, host Containers: []v1.Container{ { Name: contName, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, @@ -221,7 +219,7 @@ func checkConnectivityPingToHost(f *framework.Framework, nodeName, podName, host Containers: []v1.Container{ { Name: contName, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, Args: args, }, @@ -276,7 +274,7 @@ func getPodGWRoute(f *framework.Framework, nodeName string, podName string) net. Containers: []v1.Container{ { Name: contName, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, @@ -327,7 +325,7 @@ func createGenericPodWithLabel(f *framework.Framework, podName, nodeSelector, na return createPod(f, podName, nodeSelector, namespace, command, labels, options...) } -func createServiceForPodsWithLabel(f *framework.Framework, namespace string, servicePort int32, targetPort string, serviceType string, labels map[string]string) (string, error) { +func createServiceForPodsWithLabel(f *framework.Framework, namespace string, servicePort, targetPort uint16, serviceType string, labels map[string]string) (string, error) { service := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "service-for-pods", @@ -337,8 +335,8 @@ func createServiceForPodsWithLabel(f *framework.Framework, namespace string, ser Ports: []v1.ServicePort{ { Protocol: v1.ProtocolTCP, - TargetPort: intstr.Parse(targetPort), - Port: servicePort, + TargetPort: intstr.FromInt(int(targetPort)), + Port: int32(servicePort), }, }, Type: v1.ServiceType(serviceType), @@ -367,25 +365,31 @@ func createServiceForPodsWithLabel(f *framework.Framework, namespace string, ser // to each other. Not required to remove the iptables, because when we delete the network, the iptable rules will be removed. // Remove when this func when it is no longer experimental. // [1] https://docs.docker.com/config/daemon/ipv6/ -func isolateIPv6Networks(networkA, networkB string) error { - if containerRuntime != "docker" { +func isolateKinDIPv6Networks(networkA, networkB string) error { + if infraprovider.Get().Name() != "kind" { + // nothing to do + return nil + } + if containerengine.Get() != containerengine.Docker { panic("unsupported container runtime") } var bridgeInfNames []string // docker creates bridges by appending 12 chars from network ID to 'br-' bridgeIDLimit := 12 + exec := kexec.New() for _, network := range []string{networkA, networkB} { // output will be wrapped in single quotes - id, err := runCommand(containerRuntime, "inspect", network, "--format", "'{{.Id}}'") + idByte, err := exec.Command("docker", "inspect", network, "--format", "'{{.Id}}'").CombinedOutput() if err != nil { - return err + return fmt.Errorf("failed to inspect network %s: %v", network, err) } + id := string(idByte) if len(id) <= bridgeIDLimit+1 { return fmt.Errorf("invalid bridge ID %q", id) } bridgeInfName := fmt.Sprintf("br-%s", id[1:bridgeIDLimit+1]) // validate bridge exists - _, err = runCommand("ip", "link", "show", bridgeInfName) + _, err = exec.Command("ip", "link", "show", bridgeInfName).CombinedOutput() if err != nil { return fmt.Errorf("bridge %q doesnt exist: %v", bridgeInfName, err) } @@ -394,82 +398,54 @@ func isolateIPv6Networks(networkA, networkB string) error { if len(bridgeInfNames) != 2 { return fmt.Errorf("expected two bridge names but found %d", len(bridgeInfNames)) } - _, err := runCommand("sudo", "ip6tables", "-t", "filter", "-A", "FORWARD", "-i", bridgeInfNames[0], "-o", bridgeInfNames[1], "-j", "DROP") + _, err := exec.Command("sudo", "ip6tables", "-t", "filter", "-A", "FORWARD", "-i", bridgeInfNames[0], "-o", bridgeInfNames[1], "-j", "DROP").CombinedOutput() if err != nil { return err } - _, err = runCommand("sudo", "ip6tables", "-t", "filter", "-A", "FORWARD", "-i", bridgeInfNames[1], "-o", bridgeInfNames[0], "-j", "DROP") + _, err = exec.Command("sudo", "ip6tables", "-t", "filter", "-A", "FORWARD", "-i", bridgeInfNames[1], "-o", bridgeInfNames[0], "-j", "DROP").CombinedOutput() return err } -func createNetwork(networkName string, subnet string, v6 bool) { - args := []string{containerRuntime, "network", "create", "--internal", "--driver", "bridge", networkName, "--subnet", subnet} - if v6 { - args = append(args, "--ipv6") - } - _, err := runCommand(args...) - if err != nil && !strings.Contains(err.Error(), "already exists") { - framework.Failf("failed to create secondary network %q with subnet(s) %v: %v", networkName, subnet, err) - } -} - -func deleteNetwork(networkName string) { - args := []string{containerRuntime, "network", "rm", networkName} - _, err := runCommand(args...) - if err != nil && !strings.Contains(err.Error(), "not found") { - framework.Failf("failed to delete network %q: %v", networkName, err) - } -} - -func attachNetwork(networkName, containerName string) { - args := []string{containerRuntime, "network", "connect", networkName, containerName} - _, err := runCommand(args...) - if err != nil && !strings.Contains(err.Error(), "already exists") { - framework.Failf("failed to attach network %q to container %q: %v", networkName, containerName, err) +// forwardIPWithIPTables inserts an iptables rule to always accept source and destination of arg ip +func forwardIPWithIPTables(ip string) (func() error, error) { + isIPv6 := utilnet.IsIPv6String(ip) + ipTablesBin := "iptables" + if isIPv6 { + ipTablesBin = "ip6tables" } -} - -func detachNetwork(networkName, containerName string) { - args := []string{containerRuntime, "network", "disconnect", networkName, containerName} - _, err := runCommand(args...) - if err != nil { - framework.Failf("failed to attach network %q to container %q: %v", networkName, containerName, err) + mask := "/32" + if isIPv6 { + mask = "/128" } -} -func createClusterExternalContainer(containerName string, containerImage string, dockerArgs []string, entrypointArgs []string) (string, string) { - args := []string{containerRuntime, "run", "-itd"} - args = append(args, dockerArgs...) - args = append(args, []string{"--name", containerName, containerImage}...) - args = append(args, entrypointArgs...) - _, err := runCommand(args...) - if err != nil { - framework.Failf("failed to start external test container: %v", err) - } - ipv4, err := runCommand(containerRuntime, "inspect", "-f", "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", containerName) - if err != nil { - framework.Failf("failed to inspect external test container for its IP: %v", err) + var cleanUpFns []func() error + cleanUp := func() error { + var errs []error + for _, cleanUpFn := range cleanUpFns { + if err := cleanUpFn(); err != nil { + errs = append(errs, err) + } + } + return utilerrors.AggregateGoroutines(cleanUpFns...) } - ipv6, err := runCommand(containerRuntime, "inspect", "-f", "{{range .NetworkSettings.Networks}}{{.GlobalIPv6Address}}{{end}}", containerName) + exec := kexec.New() + _, err := exec.Command("sudo", ipTablesBin, "-I", "FORWARD", "-s", ip+mask, "-j", "ACCEPT").CombinedOutput() if err != nil { - framework.Failf("failed to inspect external test container for its IP (v6): %v", err) + return cleanUp, fmt.Errorf("failed to insert rule to forward IP %q: %w", ip+mask, err) } - if ipv4 == "" && ipv6 == "" { - framework.Failf("failed to get IPv4 or IPv6 address for container %s", containerName) - } - return strings.Trim(ipv4, "\n"), strings.Trim(ipv6, "\n") -} - -func deleteClusterExternalContainer(containerName string) { - _, err := runCommand(containerRuntime, "rm", "-f", containerName) + cleanUpFns = append(cleanUpFns, func() error { + exec.Command("sudo", ipTablesBin, "-D", "FORWARD", "-s", ip+mask, "-j", "ACCEPT").CombinedOutput() + return nil + }) + _, err = exec.Command("sudo", ipTablesBin, "-I", "FORWARD", "-d", ip+mask, "-j", "ACCEPT").CombinedOutput() if err != nil { - framework.Failf("failed to delete external test container, err: %v", err) + return cleanUp, fmt.Errorf("failed to insert rule to forward IP %q: %w", ip+mask, err) } - gomega.Eventually(func() string { - output, err := runCommand(containerRuntime, "ps", "-f", fmt.Sprintf("name=%s", containerName), "-q") - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - return output - }, 5).Should(gomega.HaveLen(0)) + cleanUpFns = append(cleanUpFns, func() error { + exec.Command("sudo", ipTablesBin, "-D", "FORWARD", "-d", ip+mask, "-j", "ACCEPT").CombinedOutput() + return nil + }) + return cleanUp, nil } // updatesNamespace labels while preserving the required UDN label @@ -515,7 +491,7 @@ func createPod(f *framework.Framework, podName, nodeSelector, namespace string, Containers: []v1.Container{ { Name: contName, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, @@ -594,15 +570,6 @@ func IsGatewayModeLocal() bool { return isLocal } -// runCommand runs the cmd and returns the combined stdout and stderr -func runCommand(cmd ...string) (string, error) { - output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() - if err != nil { - return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output) - } - return string(output), nil -} - // restartOVNKubeNodePod restarts the ovnkube-node pod from namespace, running on nodeName func restartOVNKubeNodePod(clientset kubernetes.Interface, namespace string, nodeName string) error { ovnKubeNodePods, err := clientset.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{ @@ -656,7 +623,7 @@ func restartOVNKubeNodePodsInParallel(clientset kubernetes.Interface, namespace for _, n := range nodeNames { nodeName := n restartFuncs = append(restartFuncs, func() error { - return restartOVNKubeNodePod(clientset, ovnNamespace, nodeName) + return restartOVNKubeNodePod(clientset, namespace, nodeName) }) } @@ -665,7 +632,7 @@ func restartOVNKubeNodePodsInParallel(clientset kubernetes.Interface, namespace // getOVNKubePodLogsFiltered retrieves logs from ovnkube-node pods and filters logs lines according to filteringRegexp func getOVNKubePodLogsFiltered(clientset kubernetes.Interface, namespace, nodeName, filteringRegexp string) (string, error) { - ovnKubeNodePods, err := clientset.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{ + ovnKubeNodePods, err := clientset.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "name=ovnkube-node", FieldSelector: "spec.nodeName=" + nodeName, }) @@ -673,10 +640,10 @@ func getOVNKubePodLogsFiltered(clientset kubernetes.Interface, namespace, nodeNa return "", fmt.Errorf("getOVNKubePodLogsFiltered: error while getting ovnkube-node pods: %w", err) } - logs, err := e2epod.GetPodLogs(context.TODO(), clientset, ovnNamespace, ovnKubeNodePods.Items[0].Name, getNodeContainerName()) + logs, err := e2epod.GetPodLogs(context.TODO(), clientset, namespace, ovnKubeNodePods.Items[0].Name, getNodeContainerName()) if err != nil { return "", fmt.Errorf("getOVNKubePodLogsFiltered: error while getting ovnkube-node [%s/%s] logs: %w", - ovnNamespace, ovnKubeNodePods.Items[0].Name, err) + namespace, ovnKubeNodePods.Items[0].Name, err) } scanner := bufio.NewScanner(strings.NewReader(logs)) @@ -697,13 +664,13 @@ func getOVNKubePodLogsFiltered(clientset kubernetes.Interface, namespace, nodeNa return filteredLogs, nil } -func findOvnKubeControlPlaneNode(controlPlanePodName, leaseName string) (string, error) { +func findOvnKubeControlPlaneNode(namespace, controlPlanePodName, leaseName string) (string, error) { - ovnkubeControlPlaneNode, err := e2ekubectl.RunKubectl(ovnNamespace, "get", "leases", leaseName, + ovnkubeControlPlaneNode, err := e2ekubectl.RunKubectl(namespace, "get", "leases", leaseName, "-o", "jsonpath='{.spec.holderIdentity}'") framework.ExpectNoError(err, fmt.Sprintf("Unable to retrieve leases (%s)"+ - "from %s %v", leaseName, ovnNamespace, err)) + "from %s %v", leaseName, namespace, err)) framework.Logf(fmt.Sprintf("master instance of %s is running on node %s", controlPlanePodName, ovnkubeControlPlaneNode)) // Strip leading and trailing quotes if present @@ -720,19 +687,41 @@ var _ = ginkgo.Describe("e2e control plane", func() { f := wrappedTestFramework(svcname) var ( - extDNSIP string - numControlPlanePods int - controlPlanePodName string - controlPlaneLeaseName string - nodes []v1.Node + extDNSIP string + numControlPlanePods int + controlPlanePodName string + controlPlaneLeaseName string + providerCtx infraapi.Context + secondaryProviderNetwork infraapi.Network + secondaryExternalContainer infraapi.ExternalContainer ) ginkgo.BeforeEach(func() { + var err error + providerCtx = infraprovider.Get().NewTestContext() + secondaryProviderNetwork, err = providerCtx.CreateNetwork(secondaryNetworkName, secondaryIPV4Subnet) + framework.ExpectNoError(err, "must get secondary network") + ginkgo.DeferCleanup(func() error { + return providerCtx.DeleteNetwork(secondaryProviderNetwork) + }) + nodeList, err := f.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + framework.ExpectNoError(err, "must list all Nodes") + ginkgo.By("attach secondary proivider network to all Nodes") + for _, node := range nodeList.Items { + _, err = providerCtx.AttachNetwork(secondaryProviderNetwork, node.Name) + framework.ExpectNoError(err, "network %s must attach to node %s", secondaryProviderNetwork.Name(), node.Name) + } + secondaryExternalContainerPort := infraprovider.Get().GetExternalContainerPort() + secondaryExternalContainerSpec := infraapi.ExternalContainer{Name: "e2e-ovn-k", Image: images.AgnHost(), + Network: secondaryProviderNetwork, Args: getAgnHostHTTPPortBindCMDArgs(secondaryExternalContainerPort), ExtPort: secondaryExternalContainerPort} + ginkgo.By("creating container on secondary provider network") + secondaryExternalContainer, err = providerCtx.CreateExternalContainer(secondaryExternalContainerSpec) + framework.ExpectNoError(err, "failed to create external container") // Assert basic external connectivity. // Since this is not really a test of kubernetes in any way, we // leave it as a pre-test assertion, rather than a Ginko test. ginkgo.By("Executing a successful http request from the external internet") - _, err := http.Get("http://google.com") + _, err = http.Get("http://google.com") if err != nil { framework.Failf("Unable to connect/talk to the internet: %v", err) } @@ -750,7 +739,7 @@ var _ = ginkgo.Describe("e2e control plane", func() { controlPlaneLeaseName = "ovn-kubernetes-master" } - controlPlanePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{ + controlPlanePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.Background(), metav1.ListOptions{ LabelSelector: "name=" + controlPlanePodName, }) framework.ExpectNoError(err) @@ -759,14 +748,6 @@ var _ = ginkgo.Describe("e2e control plane", func() { if IsIPv6Cluster(f.ClientSet) { extDNSIP = "2001:4860:4860::8888" } - n, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) - framework.ExpectNoError(err) - nodes = n.Items - - }) - - ginkgo.AfterEach(func() { - deleteClusterExternalContainer(exContainerName) }) ginkgo.It("should provide Internet connection continuously when ovnkube-node pod is killed", func() { @@ -782,31 +763,41 @@ var _ = ginkgo.Describe("e2e control plane", func() { framework.ExpectNoError(err) testPod := <-podChan - nodeName := testPod.Spec.NodeName - framework.Logf("Test pod running on %q", nodeName) - var targetNode *v1.Node - for _, node := range nodes { - if node.Name == nodeName { - targetNode = &node - } + targetNodeName := testPod.Spec.NodeName + targetNodeInterface, err := infraprovider.Get().GetK8NodeNetworkInterface(targetNodeName, secondaryProviderNetwork) + framework.ExpectNoError(err, "must get Node %s address for network %s", targetNodeName, secondaryProviderNetwork.Name()) + targetNodeIP := targetNodeInterface.IPv4 + if IsIPv6Cluster(f.ClientSet) { + targetNodeIP = targetNodeInterface.IPv6 } - gomega.Expect(targetNode).ToNot(gomega.BeNil()) - err = setupHostRedirectPod(f, targetNode, exContainerName, IsIPv6Cluster(f.ClientSet)) + gomega.Expect(targetNodeIP).NotTo(gomega.BeEmpty(), "unable to find Node IP for secondary network") + framework.Logf("Target node is %q and IP is %q", targetNodeName, targetNodeIP) + err = setupHostRedirectPod(f, secondaryExternalContainer, targetNodeName, targetNodeIP, IsIPv6Cluster(f.ClientSet)) framework.ExpectNoError(err) + cleanUp, err := forwardIPWithIPTables(redirectIP) + ginkgo.DeferCleanup(cleanUp) + // start TCP client go func() { defer ginkgo.GinkgoRecover() - _, _ = runCommand(containerRuntime, "exec", exContainerName, "nc", "--idle-timeout", "120s", redirectIP, redirectPort) + out, err := infraprovider.Get().ExecExternalContainerCommand(secondaryExternalContainer, []string{"nc", redirectIP, redirectPort}) + if err != nil { + framework.Logf("external container %s exited with error: %q, stdout: %q", secondaryExternalContainer.Name, err, out) + } + if out != "" { + framework.Logf("external container %s exisited with stdout: %q", secondaryExternalContainer.Name, out) + } + framework.Logf("external container with TCP client exited") }() ginkgo.By("Checking that TCP redirect connection entry in conntrack before ovnkube-node restart") gomega.Eventually(func() int { - return pokeConntrackEntries(nodeName, redirectIP, "tcp", nil) + return pokeConntrackEntries(targetNodeName, redirectIP, "tcp", nil) }, "10s", "1s").ShouldNot(gomega.Equal(0)) - ginkgo.By("Deleting ovn-kube pod on node " + nodeName) - err = restartOVNKubeNodePod(f.ClientSet, ovnNamespace, nodeName) + ginkgo.By("Deleting ovn-kube pod on node " + targetNodeName) + err = restartOVNKubeNodePod(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), targetNodeName) framework.ExpectNoError(err) ginkgo.By("Ensuring there were no connectivity errors") @@ -817,14 +808,14 @@ var _ = ginkgo.Describe("e2e control plane", func() { ginkgo.By("Checking that TCP redirect connection entry in conntrack remained after ovnkube-node restart") gomega.Consistently(func() int { - return pokeConntrackEntries(nodeName, redirectIP, "tcp", nil) + return pokeConntrackEntries(targetNodeName, redirectIP, "tcp", nil) }, "5s", "500ms").ShouldNot(gomega.Equal(0)) }) ginkgo.It("should provide Internet connection continuously when pod running master instance of ovnkube-control-plane is killed", func() { ginkgo.By(fmt.Sprintf("Running container which tries to connect to %s in a loop", extDNSIP)) - ovnKubeControlPlaneNode, err := findOvnKubeControlPlaneNode(controlPlanePodName, controlPlaneLeaseName) + ovnKubeControlPlaneNode, err := findOvnKubeControlPlaneNode(deploymentconfig.Get().OVNKubernetesNamespace(), controlPlanePodName, controlPlaneLeaseName) framework.ExpectNoError(err, fmt.Sprintf("unable to find current master of %s cluster %v", controlPlanePodName, err)) podChan, errChan := make(chan *v1.Pod), make(chan error) go func() { @@ -839,8 +830,8 @@ var _ = ginkgo.Describe("e2e control plane", func() { framework.Logf("Test pod running on %q", testPod.Spec.NodeName) time.Sleep(5 * time.Second) - - podClient := f.ClientSet.CoreV1().Pods(ovnNamespace) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + podClient := f.ClientSet.CoreV1().Pods(ovnKubeNamespace) podList, err := podClient.List(context.Background(), metav1.ListOptions{ LabelSelector: "name=" + controlPlanePodName, @@ -856,7 +847,7 @@ var _ = ginkgo.Describe("e2e control plane", func() { } ginkgo.By("Deleting ovnkube control plane pod " + podName) - deletePodWithWaitByName(context.TODO(), f.ClientSet, podName, ovnNamespace) + e2epod.DeletePodWithWaitByName(context.TODO(), f.ClientSet, podName, ovnKubeNamespace) framework.Logf("Deleted ovnkube control plane pod %q", podName) ginkgo.By("Ensuring there were no connectivity errors") @@ -868,8 +859,8 @@ var _ = ginkgo.Describe("e2e control plane", func() { ginkgo.It("should provide Internet connection continuously when all pods are killed on node running master instance of ovnkube-control-plane", func() { ginkgo.By(fmt.Sprintf("Running container which tries to connect to %s in a loop", extDNSIP)) - - ovnKubeControlPlaneNode, err := findOvnKubeControlPlaneNode(controlPlanePodName, controlPlaneLeaseName) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + ovnKubeControlPlaneNode, err := findOvnKubeControlPlaneNode(ovnKubeNamespace, controlPlanePodName, controlPlaneLeaseName) framework.ExpectNoError(err, fmt.Sprintf("unable to find current master of %s cluster %v", controlPlanePodName, err)) podChan, errChan := make(chan *v1.Pod), make(chan error) @@ -902,8 +893,7 @@ var _ = ginkgo.Describe("e2e control plane", func() { !strings.HasPrefix(pod.Name, "ovnkube-identity") && !strings.HasPrefix(pod.Name, "ovs-node") { framework.Logf("%q", pod.Namespace) - err = deletePodWithWaitByName(context.TODO(), f.ClientSet, pod.Name, ovnNamespace) - framework.ExpectNoError(err, fmt.Sprintf("failed to delete pod %s", pod.Name)) + deletePodWithWaitByName(context.Background(), f.ClientSet, pod.GetName(), ovnKubeNamespace) framework.Logf("Deleted control plane pod %q", pod.Name) } } @@ -936,7 +926,7 @@ var _ = ginkgo.Describe("e2e control plane", func() { for _, pod := range podList.Items { if strings.HasPrefix(pod.Name, controlPlanePodName) && !strings.HasPrefix(pod.Name, "ovs-node") { framework.Logf("%q", pod.Namespace) - err = deletePodWithWaitByName(context.TODO(), f.ClientSet, pod.Name, ovnNamespace) + err = deletePodWithWaitByName(context.TODO(), f.ClientSet, pod.Name, deploymentconfig.Get().OVNKubernetesNamespace()) framework.ExpectNoError(err, fmt.Sprintf("failed to delete pod %s", pod.Name)) framework.Logf("Deleted control plane pod %q", pod.Name) } @@ -968,12 +958,15 @@ var _ = ginkgo.Describe("e2e control plane", func() { }) ginkgo.Describe("test node readiness according to its defaults interface MTU size", func() { - const testNodeName = "ovn-worker" + var testNodeName string var originalMTU int ginkgo.BeforeEach(func() { + node, err := e2enode.GetRandomReadySchedulableNode(context.Background(), f.ClientSet) + framework.ExpectNoError(err, "must get a schedulable Node") + testNodeName = node.GetName() // get the interface current mtu and store it as original value to be able to reset it after the test - res, err := runCommand(containerRuntime, "exec", testNodeName, "cat", "/sys/class/net/breth0/mtu") + res, err := infraprovider.Get().ExecK8NodeCommand(testNodeName, []string{"cat", fmt.Sprintf("/sys/class/net/%s/mtu", deploymentconfig.Get().ExternalBridgeName())}) if err != nil { framework.Failf("could not get MTU of interface: %s", err) } @@ -987,13 +980,13 @@ var _ = ginkgo.Describe("e2e control plane", func() { ginkgo.AfterEach(func() { // reset MTU to original value - _, err := runCommand(containerRuntime, "exec", testNodeName, "ip", "link", "set", "breth0", "mtu", fmt.Sprintf("%d", originalMTU)) + _, err := infraprovider.Get().ExecK8NodeCommand(testNodeName, []string{"ip", "link", "set", deploymentconfig.Get().ExternalBridgeName(), "mtu", fmt.Sprintf("%d", originalMTU)}) if err != nil { framework.Failf("could not reset MTU of interface: %s", err) } // restart ovnkube-node pod - if err := restartOVNKubeNodePod(f.ClientSet, ovnNamespace, testNodeName); err != nil { + if err := restartOVNKubeNodePod(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), testNodeName); err != nil { framework.Failf("could not restart ovnkube-node pod: %s", err) } @@ -1003,13 +996,13 @@ var _ = ginkgo.Describe("e2e control plane", func() { ginkgo.It("should get node not ready with a too small MTU", func() { // set the defaults interface MTU very low - _, err := runCommand(containerRuntime, "exec", testNodeName, "ip", "link", "set", "breth0", "mtu", "1000") + _, err := infraprovider.Get().ExecK8NodeCommand(testNodeName, []string{"ip", "link", "set", deploymentconfig.Get().ExternalBridgeName(), "mtu", "1000"}) if err != nil { framework.Failf("could not set MTU of interface: %s", err) } // restart ovnkube-node pod to trigger mtu validation - if err := restartOVNKubeNodePod(f.ClientSet, ovnNamespace, testNodeName); err == nil || err != wait.ErrWaitTimeout { + if err := restartOVNKubeNodePod(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), testNodeName); err == nil || err != wait.ErrWaitTimeout { if err == nil { framework.Failf("ovnkube-node pod restarted correctly, but wasn't supposed to: %s", err) } @@ -1027,13 +1020,13 @@ var _ = ginkgo.Describe("e2e control plane", func() { ginkgo.It("should get node ready with a big enough MTU", func() { // set the defaults interface MTU big enough - _, err := runCommand(containerRuntime, "exec", testNodeName, "ip", "link", "set", "breth0", "mtu", "2000") + _, err := infraprovider.Get().ExecK8NodeCommand(testNodeName, []string{"ip", "link", "set", deploymentconfig.Get().ExternalBridgeName(), "mtu", "2000"}) if err != nil { framework.Failf("could not set MTU of interface: %s", err) } // restart ovnkube-node pod to trigger mtu validation - if err := restartOVNKubeNodePod(f.ClientSet, ovnNamespace, testNodeName); err != nil { + if err := restartOVNKubeNodePod(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), testNodeName); err != nil { framework.Failf("could not restart ovnkube-node pod: %s", err) } @@ -1051,13 +1044,12 @@ var _ = ginkgo.Describe("e2e control plane", func() { // Test pod connectivity to other host IP addresses var _ = ginkgo.Describe("test e2e pod connectivity to host addresses", func() { - const ( - ovnWorkerNode string = "ovn-worker" - svcname string = "node-e2e-to-host" - ) + const svcname string = "node-e2e-to-host" + var ( - targetIP string - singleIPMask string + targetIP string + singleIPMask string + workerNodeName string ) f := wrappedTestFramework(svcname) @@ -1069,32 +1061,36 @@ var _ = ginkgo.Describe("test e2e pod connectivity to host addresses", func() { targetIP = "2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF" singleIPMask = "128" } + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 1) + framework.ExpectNoError(err) + if len(nodes.Items) < 1 { + framework.Failf("Test requires >= 1 Ready nodes, but there are only %v nodes", len(nodes.Items)) + } + workerNodeName = nodes.Items[0].Name // Add another IP address to the worker - _, err := runCommand(containerRuntime, "exec", ovnWorkerNode, "ip", "a", "add", - fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", "breth0") - framework.ExpectNoError(err, "failed to add IP to %s", ovnWorkerNode) + _, err = infraprovider.Get().ExecK8NodeCommand(workerNodeName, []string{"ip", "a", "add", + fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName()}) + framework.ExpectNoError(err, "failed to add IP to %s", workerNodeName) }) ginkgo.AfterEach(func() { - _, err := runCommand(containerRuntime, "exec", ovnWorkerNode, "ip", "a", "del", - fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", "breth0") - framework.ExpectNoError(err, "failed to remove IP from %s", ovnWorkerNode) + _, err := infraprovider.Get().ExecK8NodeCommand(workerNodeName, []string{"ip", "a", "del", + fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName()}) + framework.ExpectNoError(err, "failed to remove IP from %s", workerNodeName) }) ginkgo.It("Should validate connectivity from a pod to a non-node host address on same node", func() { // Spin up another pod that attempts to reach the previously started pod on separate nodes framework.ExpectNoError( - checkConnectivityPingToHost(f, ovnWorkerNode, "e2e-src-ping-pod", targetIP, ipv4PingCommand, 30)) + checkConnectivityPingToHost(f, workerNodeName, "e2e-src-ping-pod", targetIP, ipv4PingCommand, 30)) }) }) // Test e2e inter-node connectivity over br-int var _ = ginkgo.Describe("test e2e inter-node connectivity between worker nodes", func() { const ( - svcname string = "inter-node-e2e" - ovnWorkerNode string = "ovn-worker" - ovnWorkerNode2 string = "ovn-worker2" - getPodIPRetry int = 20 + svcname string = "inter-node-e2e" + getPodIPRetry int = 20 ) f := wrappedTestFramework(svcname) @@ -1107,8 +1103,13 @@ var _ = ginkgo.Describe("test e2e inter-node connectivity between worker nodes", dstPingPodName := "e2e-dst-ping-pod" command := []string{"bash", "-c", "sleep 20000"} // non-ha ci mode runs a named set of nodes with a prefix of ovn-worker - ciWorkerNodeSrc = ovnWorkerNode - ciWorkerNodeDst = ovnWorkerNode2 + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err) + if len(nodes.Items) < 2 { + framework.Failf("Test requires >= 2 Ready nodes, but there are only %v nodes", len(nodes.Items)) + } + ciWorkerNodeSrc = nodes.Items[0].Name + ciWorkerNodeDst = nodes.Items[1].Name ginkgo.By(fmt.Sprintf("Creating a container on node %s and verifying connectivity to a pod on node %s", ciWorkerNodeSrc, ciWorkerNodeDst)) @@ -1161,8 +1162,8 @@ func createSrcPod(podName, nodeName string, ipCheckInterval, ipCheckTimeout time var _ = ginkgo.Describe("e2e network policy hairpinning validation", func() { const ( svcName string = "network-policy" - serviceHTTPPort = 6666 - endpointHTTPPort = "80" + serviceHTTPPort uint16 = 6666 + endpointHTTPPort uint16 = 80 ) f := wrappedTestFramework(svcName) @@ -1176,7 +1177,7 @@ var _ = ginkgo.Describe("e2e network policy hairpinning validation", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By("creating pods") - cmd := []string{"/bin/bash", "-c", fmt.Sprintf("/agnhost netexec --http-port %s", endpointHTTPPort)} + cmd := getAgnHostHTTPPortBindFullCMD(endpointHTTPPort) // pod1 is a client and a service backend for hairpinned traffic pod1 := newAgnhostPod(namespaceName, "pod1", cmd...) pod1.Labels = hairpinPodSel @@ -1193,7 +1194,7 @@ var _ = ginkgo.Describe("e2e network policy hairpinning validation", func() { framework.ExpectNoError(err, fmt.Sprintf("ClusterIP svc never had an endpoint, expected 1: %v", err)) ginkgo.By("verify hairpinned connection from a pod to its own service is allowed") - hostname := pokeEndpoint(namespaceName, pod1.Name, "http", svcIP, serviceHTTPPort, "hostname") + hostname := pokeEndpointViaPod(f, namespaceName, pod1.Name, svcIP, serviceHTTPPort, "hostname") gomega.Expect(hostname).To(gomega.Equal(pod1.Name), fmt.Sprintf("returned client: %v was not correct", hostname)) ginkgo.By("verify connection to another pod is denied") @@ -1205,13 +1206,12 @@ var _ = ginkgo.Describe("e2e network policy hairpinning validation", func() { var _ = ginkgo.Describe("e2e ingress traffic validation", func() { const ( - endpointHTTPPort = 80 - endpointUDPPort = 90 - clusterHTTPPort = 81 - clusterHTTPPort2 = 82 - clusterUDPPort = 91 - clusterUDPPort2 = 92 - clientContainerName = "npclient" + endpointHTTPPort = 80 + endpointUDPPort = 90 + clusterHTTPPort = 81 + clusterHTTPPort2 = 82 + clusterUDPPort = 91 + clusterUDPPort2 = 92 ) f := wrappedTestFramework("nodeport-ingress-test") @@ -1219,14 +1219,19 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { var endPoints []*v1.Pod var nodesHostnames sets.String - maxTries := 0 + var maxTries int var nodes *v1.NodeList var newNodeAddresses []string - var externalIpv4 string - var externalIpv6 string + var providerCtx infraapi.Context var isDualStack bool + ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() + }) + ginkgo.Context("Validating ingress traffic", func() { + var externalContainer infraapi.ExternalContainer + ginkgo.BeforeEach(func() { endPoints = make([]*v1.Pod, 0) nodesHostnames = sets.NewString() @@ -1267,11 +1272,13 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { // the client uses the netexec command from the agnhost image, which is able to receive commands for poking other // addresses. // CAP NET_ADMIN is needed to remove neighbor entries for ARP/NS flap tests - externalIpv4, externalIpv6 = createClusterExternalContainer(clientContainerName, agnhostImage, []string{"--network", "kind", "-P", "--cap-add", "NET_ADMIN"}, []string{"netexec", "--http-port=80"}) - }) - - ginkgo.AfterEach(func() { - deleteClusterExternalContainer(clientContainerName) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer = infraapi.ExternalContainer{Name: "e2e-ingress", Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to create external service", externalContainer.String()) }) // This test validates ingress traffic to nodeports. @@ -1312,7 +1319,7 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { ginkgo.By("Hitting the nodeport on " + node.Name + " and reaching all the endpoints " + protocol) for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, nodeAddress.Address, nodePort, "hostname") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, nodeAddress.Address, nodePort, "hostname") responses.Insert(epHostname) // each endpoint returns its hostname. By doing this, we validate that each ep was reached at least once. @@ -1412,24 +1419,11 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { time.Sleep(5 * time.Second) // Test all node IPv4 addresses http and return true if all of them come back with a valid answer. - ipPort := net.JoinHostPort("localhost", "80") for _, ipAddresses := range ipv4Addresses { for _, targetHost := range ipAddresses { - cmd := []string{containerRuntime, "exec", clientContainerName} - curlCommand := strings.Split(fmt.Sprintf("curl --max-time 2 -g -q -s http://%s/dial?request=hostname&protocol=http&host=%s&port=%d&tries=1", - ipPort, - targetHost, - protocolPorts["http"]), " ") - cmd = append(cmd, curlCommand...) - framework.Logf("Running command %v", cmd) - res, err := runCommand(cmd...) - if err != nil { - framework.Logf("Failed, res: %v, err: %v", res, err) - return false - } - res, err = parseNetexecResponse(res) - if err != nil { - framework.Logf("Failed, res: %v, err: %v", res, err) + hostname := pokeEndpointViaExternalContainer(externalContainer, "http", targetHost, protocolPorts["http"], "hostname") + if hostname == "" { + framework.Logf("Failed, could get hostname") return false } } @@ -1464,7 +1458,7 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { responses := sets.NewString() valid := false for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, address, port, "hostname") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, address, port, "hostname") responses.Insert(epHostname) // each endpoint returns its hostname. By doing this, we validate that each ep was reached at least once. @@ -1517,9 +1511,9 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { expectedResponses := sets.NewString() if utilnet.IsIPv6String(nodeAddress.Address) { - expectedResponses.Insert(node.Name+"-ep", externalIpv6) + expectedResponses.Insert(node.Name+"-ep", externalContainer.GetIPv6()) } else { - expectedResponses.Insert(node.Name+"-ep", externalIpv4) + expectedResponses.Insert(node.Name+"-ep", externalContainer.GetIPv4()) } valid := false @@ -1531,8 +1525,8 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { ginkgo.By("Hitting the nodeport on " + node.Name + " and trying to reach only the local endpoint with protocol " + protocol) for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, nodeAddress.Address, nodePort, "hostname") - epClientIP := pokeEndpoint("", clientContainerName, protocol, nodeAddress.Address, nodePort, "clientip") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, nodeAddress.Address, nodePort, "hostname") + epClientIP := pokeEndpointViaExternalContainer(externalContainer, protocol, nodeAddress.Address, nodePort, "clientip") epClientIP, _, err = net.SplitHostPort(epClientIP) framework.ExpectNoError(err, "failed to parse client ip:port") responses.Insert(epHostname, epClientIP) @@ -1590,8 +1584,8 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { for _, externalAddress := range addresses { ginkgo.By(fmt.Sprintf("Making sure that the neighbor entry is stable for endpoint IP %s", externalAddress)) - valid := isNeighborEntryStable(clientContainerName, externalAddress, 10) - gomega.Expect(valid).To(gomega.Equal(true), "Validation failed for neighbor entry of external address: %s", externalAddress) + valid := isNeighborEntryStable(externalContainer, externalAddress, 10) + gomega.Expect(valid).Should(gomega.BeTrue(), "Validation failed for neighbor entry of external address: %s", externalAddress) for _, protocol := range []string{"http", "udp"} { externalPort := int32(clusterHTTPPort) @@ -1603,8 +1597,8 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { externalAddress, protocol, externalPort)) - valid = pokeExternalIpService(clientContainerName, protocol, externalAddress, externalPort, maxTries, nodesHostnames) - gomega.Expect(valid).To(gomega.Equal(true), "Validation failed for external address: %s", externalAddress) + valid = pokeExternalIpService(externalContainer, protocol, externalAddress, externalPort, maxTries, nodesHostnames) + gomega.Expect(valid).Should(gomega.BeTrue(), "Validation failed for external address: %s", externalAddress) } } @@ -1615,8 +1609,8 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { for _, externalAddress := range addresses { ginkgo.By(fmt.Sprintf("Making sure that the neighbor entry is stable for endpoint IP %s", externalAddress)) - valid := isNeighborEntryStable(clientContainerName, externalAddress, 10) - gomega.Expect(valid).To(gomega.Equal(true), "Validation failed for neighbor entry of external address: %s", externalAddress) + valid := isNeighborEntryStable(externalContainer, externalAddress, 10) + gomega.Expect(valid).Should(gomega.BeTrue(), "Validation failed for neighbor entry of external address: %s", externalAddress) for _, protocol := range []string{"http", "udp"} { externalPort := int32(clusterHTTPPort2) @@ -1628,14 +1622,16 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { externalAddress, protocol, externalPort)) - valid = pokeExternalIpService(clientContainerName, protocol, externalAddress, externalPort, maxTries, nodesHostnames) - gomega.Expect(valid).To(gomega.Equal(true), "Validation failed for external address: %s", externalAddress) + valid = pokeExternalIpService(externalContainer, protocol, externalAddress, externalPort, maxTries, nodesHostnames) + gomega.Expect(valid).Should(gomega.BeTrue(), "Validation failed for external address: %s", externalAddress) } } }) }) ginkgo.Context("Validating ingress traffic to manually added node IPs", func() { + var externalContainer infraapi.ExternalContainer + ginkgo.BeforeEach(func() { endPoints = make([]*v1.Pod, 0) nodesHostnames = sets.NewString() @@ -1673,11 +1669,19 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { ginkgo.By("Creating an external container to send the traffic from") // the client uses the netexec command from the agnhost image, which is able to receive commands for poking other // addresses. - createClusterExternalContainer(clientContainerName, agnhostImage, []string{"--network", "kind", "-P"}, []string{"netexec", "--http-port=80"}) - - // If `kindexgw` exists, connect client container to it - runCommand(containerRuntime, "network", "connect", "kindexgw", clientContainerName) - + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer = infraapi.ExternalContainer{Name: "e2e-ingress-add-more", Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "external container %s must be created successfully", externalContainer.Name) + + // If `xgw` exists, connect client container to it + exGwNetwork, err := infraprovider.Get().GetNetwork("xgw") + if err == nil { + _, _ = providerCtx.AttachNetwork(exGwNetwork, externalContainer.Name) + } ginkgo.By("Adding ip addresses to each node") // add new secondary IP from node subnet to all nodes, if the cluster is v6 add an ipv6 address var newIP string @@ -1689,27 +1693,22 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { newIP = "172.18.1." + strconv.Itoa(i+1) } // manually add the a secondary IP to each node - _, err := runCommand(containerRuntime, "exec", node.Name, "ip", "addr", "add", newIP, "dev", "breth0") + _, err := infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "addr", "add", newIP, "dev", deploymentconfig.Get().ExternalBridgeName()}) if err != nil { framework.Failf("failed to add new Addresses to node %s: %v", node.Name, err) } + providerCtx.AddCleanUpFn(func() error { + _, err := infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "addr", "del", newIP, "dev", deploymentconfig.Get().ExternalBridgeName()}) + if err != nil { + framework.Logf("failed to add new Addresses to node %s: %v", node.Name, err) + } + return nil + }) newNodeAddresses = append(newNodeAddresses, newIP) } }) - ginkgo.AfterEach(func() { - deleteClusterExternalContainer(clientContainerName) - - for i, node := range nodes.Items { - // delete the secondary IP previoulsy added to the nodes - _, err := runCommand(containerRuntime, "exec", node.Name, "ip", "addr", "delete", newNodeAddresses[i], "dev", "breth0") - if err != nil { - framework.Failf("failed to delete new Addresses to node %s: %v", node.Name, err) - } - } - }) - // This test validates ingress traffic to externalservices after a new node Ip is added. // It creates a service on both udp and tcp and assigns the new node IPs as // external Addresses. Then, creates a backend pod on each node. @@ -1741,7 +1740,7 @@ var _ = ginkgo.Describe("e2e ingress traffic validation", func() { ginkgo.By("Hitting the external service on " + externalAddress + " and reaching all the endpoints " + protocol) for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, externalAddress, externalPort, "hostname") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, externalAddress, externalPort, "hostname") responses.Insert(epHostname) // each endpoint returns its hostname. By doing this, we validate that each ep was reached at least once. @@ -1774,8 +1773,11 @@ var _ = ginkgo.Describe("e2e ingress to host-networked pods traffic validation", var nodesHostnames sets.String maxTries := 0 var nodes *v1.NodeList - var externalIpv4 string - var externalIpv6 string + var providerCtx infraapi.Context + + ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() + }) // This test validates ingress traffic to nodeports with externalTrafficPolicy Set to local. // It creates a nodeport service on both udp and tcp, and creates a host networked @@ -1786,6 +1788,8 @@ var _ = ginkgo.Describe("e2e ingress to host-networked pods traffic validation", // nodeport on the node with the backend actually receives traffic and that the packet is not // SNATed. ginkgo.Context("Validating ingress traffic to Host Networked pods with externalTrafficPolicy=local", func() { + var externalContainer infraapi.ExternalContainer + ginkgo.BeforeEach(func() { endPoints = make([]*v1.Pod, 0) nodesHostnames = sets.NewString() @@ -1827,11 +1831,16 @@ var _ = ginkgo.Describe("e2e ingress to host-networked pods traffic validation", ginkgo.By("Creating an external container to send the traffic from") // the client uses the netexec command from the agnhost image, which is able to receive commands for poking other // addresses. - externalIpv4, externalIpv6 = createClusterExternalContainer(clientContainerName, agnhostImage, []string{"--network", "kind", "-P"}, []string{"netexec", "--http-port=80"}) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer = infraapi.ExternalContainer{Name: clientContainerName, Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "external container %s must be created successfully", externalContainer.Name) }) ginkgo.AfterEach(func() { - deleteClusterExternalContainer(clientContainerName) // f.Delete will delete the namespace and run WaitForNamespacesDeleted // This is inside the Context and will happen before the framework's teardown inside the Describe f.DeleteNamespace(context.TODO(), f.Namespace.Name) @@ -1863,9 +1872,9 @@ var _ = ginkgo.Describe("e2e ingress to host-networked pods traffic validation", expectedResponses := sets.NewString() if utilnet.IsIPv6String(nodeAddress.Address) { - expectedResponses.Insert(node.Name, externalIpv6) + expectedResponses.Insert(node.Name, externalContainer.GetIPv6()) } else { - expectedResponses.Insert(node.Name, externalIpv4) + expectedResponses.Insert(node.Name, externalContainer.GetIPv4()) } valid := false @@ -1876,8 +1885,8 @@ var _ = ginkgo.Describe("e2e ingress to host-networked pods traffic validation", ginkgo.By("Hitting the nodeport on " + node.Name + " and trying to reach only the local endpoint with protocol " + protocol) for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, nodeAddress.Address, nodePort, "hostname") - epClientIP := pokeEndpoint("", clientContainerName, protocol, nodeAddress.Address, nodePort, "clientip") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, nodeAddress.Address, nodePort, "hostname") + epClientIP := pokeEndpointViaExternalContainer(externalContainer, protocol, nodeAddress.Address, nodePort, "clientip") epClientIP, _, err = net.SplitHostPort(epClientIP) framework.ExpectNoError(err, "failed to parse client ip:port") responses.Insert(epHostname, epClientIP) @@ -1907,65 +1916,57 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { ipfix flowMonitoringProtocol = "ipfix" sflow flowMonitoringProtocol = "sflow" - svcname string = "netflow-test" - collectorContainer string = "netflow-collector" - ciNetworkName string = "kind" + svcname string = "netflow-test" + collectorContainerTemplate string = "netflow-collector%d" ) + getContainerName := func(port uint16) string { + return fmt.Sprintf(collectorContainerTemplate, port) + } + keywordInLogs := map[flowMonitoringProtocol]string{ netflow_v5: "NETFLOW_V5", ipfix: "IPFIX", sflow: "SFLOW_5"} f := wrappedTestFramework(svcname) - ginkgo.AfterEach(func() { - // tear down the collector container - if cid, _ := runCommand(containerRuntime, "ps", "-qaf", fmt.Sprintf("name=%s", collectorContainer)); cid != "" { - if _, err := runCommand(containerRuntime, "rm", "-f", collectorContainer); err != nil { - framework.Logf("failed to delete the collector test container %s %v", - collectorContainer, err) - } - } + var providerCtx infraapi.Context + + ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() }) ginkgo.DescribeTable("Should validate flow data of br-int is sent to an external gateway", func(protocol flowMonitoringProtocol, collectorPort uint16) { protocolStr := string(protocol) - ipField := "IPAddress" isIpv6 := IsIPv6Cluster(f.ClientSet) - if isIpv6 { - ipField = "GlobalIPv6Address" - } - ciNetworkFlag := fmt.Sprintf("{{ .NetworkSettings.Networks.kind.%s }}", ipField) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() ginkgo.By("Starting a flow collector container") - // start the collector container that will receive data - _, err := runCommand(containerRuntime, "run", "-itd", "--privileged", "--network", ciNetworkName, - "--name", collectorContainer, "cloudflare/goflow", "-kafka=false") + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + collectorExternalContainer := infraapi.ExternalContainer{Name: getContainerName(collectorPort), Image: "cloudflare/goflow", + Network: primaryProviderNetwork, Args: []string{"-kafka=false"}, ExtPort: collectorPort} + collectorExternalContainer, err = providerCtx.CreateExternalContainer(collectorExternalContainer) if err != nil { - framework.Failf("failed to start flow collector container %s: %v", collectorContainer, err) + framework.Failf("failed to start flow collector container %s: %v", getContainerName(collectorPort), err) } ovnEnvVar := fmt.Sprintf("OVN_%s_TARGETS", strings.ToUpper(protocolStr)) // retrieve the ip of the collector container - collectorIP, err := runCommand(containerRuntime, "inspect", "-f", ciNetworkFlag, collectorContainer) - if err != nil { - framework.Failf("could not retrieve IP address of collector container: %v", err) - } - // trim newline from the inspect output - collectorIP = strings.TrimSpace(collectorIP) - if net.ParseIP(collectorIP) == nil { - framework.Failf("Unable to retrieve a valid address from container %s with inspect output of %s", - collectorContainer, collectorIP) + collectorIP := collectorExternalContainer.GetIPv4() + if isIpv6 { + collectorIP = collectorExternalContainer.GetIPv6() } - addressAndPort := net.JoinHostPort(collectorIP, strconv.Itoa(int(collectorPort))) + + addressAndPort := net.JoinHostPort(collectorIP, strconv.Itoa(int(collectorExternalContainer.ExtPort))) ginkgo.By(fmt.Sprintf("Configuring ovnkube-node to use the new %s collector target", protocolStr)) setEnv := map[string]string{ovnEnvVar: addressAndPort} - setUnsetTemplateContainerEnv(f.ClientSet, ovnNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) + setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) ginkgo.By(fmt.Sprintf("Checking that the collector container received %s data", protocolStr)) keyword := keywordInLogs[protocol] collectorContainerLogsTest := func() wait.ConditionFunc { return func() (bool, error) { - collectorContainerLogs, err := runCommand(containerRuntime, "logs", collectorContainer) + collectorContainerLogs, err := infraprovider.Get().GetExternalContainerLogs(collectorExternalContainer) if err != nil { framework.Logf("failed to inspect logs in test container: %v", err) return false, nil @@ -1989,9 +1990,9 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { protocolStr, keyword)) ginkgo.By(fmt.Sprintf("Unsetting %s variable in ovnkube-node daemonset", ovnEnvVar)) - setUnsetTemplateContainerEnv(f.ClientSet, ovnNamespace, "daemonset/ovnkube-node", getNodeContainerName(), nil, ovnEnvVar) + setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), nil, ovnEnvVar) - ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "name=ovnkube-node", }) if err != nil { @@ -2002,7 +2003,7 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { execOptions := e2epod.ExecOptions{ Command: []string{"ovs-vsctl", "find", strings.ToLower(protocolStr)}, - Namespace: ovnNamespace, + Namespace: ovnKubeNamespace, PodName: ovnKubeNodePod.Name, ContainerName: getNodeContainerName(), CaptureStdout: true, @@ -2078,8 +2079,6 @@ var _ = ginkgo.Describe("e2e delete databases", func() { northDBFileName string = "ovnnb_db.db" southDBFileName string = "ovnsb_db.db" dirDB string = "/etc/ovn" - ovnWorkerNode string = "ovn-worker" - ovnWorkerNode2 string = "ovn-worker2" haModeMinDb int = 0 haModeMaxDb int = 2 ) @@ -2144,7 +2143,7 @@ var _ = ginkgo.Describe("e2e delete databases", func() { fileExistsOnPod := func(f *framework.Framework, namespace string, pod *v1.Pod, file string) bool { containerFlag := fmt.Sprintf("-c=%s", pod.Spec.Containers[0].Name) - _, err := e2ekubectl.RunKubectl(ovnNamespace, "exec", pod.Name, containerFlag, "--", "ls", file) + _, err := e2ekubectl.RunKubectl(namespace, "exec", pod.Name, containerFlag, "--", "ls", file) if err == nil { return true } @@ -2176,7 +2175,7 @@ var _ = ginkgo.Describe("e2e delete databases", func() { deleteFileFromPod := func(f *framework.Framework, namespace string, pod *v1.Pod, file string) { containerFlag := fmt.Sprintf("-c=%s", pod.Spec.Containers[0].Name) - e2ekubectl.RunKubectl(ovnNamespace, "exec", pod.Name, containerFlag, "--", "rm", file) + e2ekubectl.RunKubectl(namespace, "exec", pod.Name, containerFlag, "--", "rm", file) if fileExistsOnPod(f, namespace, pod, file) { framework.Failf("Error: failed to delete file %s", file) } @@ -2201,29 +2200,25 @@ var _ = ginkgo.Describe("e2e delete databases", func() { framework.ExpectNoError(<-errChan) } - twoPodsContinuousConnectivityTest := func(f *framework.Framework, - node1Name string, node2Name string, - syncChan chan string, errChan chan error) { + twoPodsContinuousConnectivityTest := func(f *framework.Framework, node1Name string, node2Name string, syncChan chan string, errChan chan error) { const ( pod1Name string = "connectivity-test-pod1" pod2Name string = "connectivity-test-pod2" - port string = "8080" + podPort uint16 = 8080 timeIntervalBetweenChecks time.Duration = 2 * time.Second ) - var ( - command = []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=" + port)} - ) - _, err := createGenericPod(f, pod1Name, node1Name, f.Namespace.Name, command) + _, err := createGenericPod(f, pod1Name, node1Name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(podPort)) framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) - _, err = createGenericPod(f, pod2Name, node2Name, f.Namespace.Name, command) + _, err = createGenericPod(f, pod2Name, node2Name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(podPort)) framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod2Name) pod2IP := getPodAddress(pod2Name, f.Namespace.Name) ginkgo.By("Checking initial connectivity from one pod to the other and verifying that the connection is achieved") - stdout, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod1Name, "--", "curl", fmt.Sprintf("%s/hostname", net.JoinHostPort(pod2IP, port))) + stdout, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod1Name, "--", "curl", fmt.Sprintf("%s/hostname", + net.JoinHostPort(pod2IP, fmt.Sprintf("%d", podPort)))) if err != nil || stdout != pod2Name { errChan <- fmt.Errorf("Error: attempted connection to pod %s found err: %v", pod2Name, err) @@ -2238,7 +2233,8 @@ var _ = ginkgo.Describe("e2e delete databases", func() { framework.Logf(msg + ": finish connectivity test.") break L default: - stdout, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod1Name, "--", "curl", fmt.Sprintf("%s/hostname", net.JoinHostPort(pod2IP, port))) + stdout, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod1Name, "--", "curl", fmt.Sprintf("%s/hostname", + net.JoinHostPort(pod2IP, fmt.Sprintf("%d", podPort)))) if err != nil || stdout != pod2Name { errChan <- err framework.Failf("Error: attempted connection to pod %s found err: %v", pod2Name, err) @@ -2265,14 +2261,20 @@ var _ = ginkgo.Describe("e2e delete databases", func() { DBFileNamesToDelete[i] = path.Join(dirDB, file) } + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err) + if len(nodes.Items) < 2 { + ginkgo.Skip("Test requires >= 2 Ready nodes, but there are only %v nodes", len(nodes.Items)) + } framework.Logf("connectivity test before deleting db files") framework.Logf("test simple connectivity from new pod to API server, before deleting db files") singlePodConnectivityTest(f, "before-delete-db-files") framework.Logf("setup two pods for continuous connectivity test") syncChan, errChan := make(chan string), make(chan error) + node1Name, node2Name := nodes.Items[0].GetName(), nodes.Items[1].GetName() go func() { defer ginkgo.GinkgoRecover() - twoPodsContinuousConnectivityTest(f, ovnWorkerNode, ovnWorkerNode2, syncChan, errChan) + twoPodsContinuousConnectivityTest(f, node1Name, node2Name, syncChan, errChan) }() select { @@ -2287,22 +2289,23 @@ var _ = ginkgo.Describe("e2e delete databases", func() { // Start the db disruption - delete the db files and delete the db-pod in order to emulate the cluster/pod restart // Retrieve the DB pod - dbPod, err := f.ClientSet.CoreV1().Pods(ovnNamespace).Get(context.Background(), db_pod_name, metav1.GetOptions{}) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + dbPod, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).Get(context.Background(), db_pod_name, metav1.GetOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to get pod: %s, err: %v", db_pod_name, err)) // Check that all files are on the db pod framework.Logf("make sure that all the db files are on db pod %s", dbPod.Name) - if !allFilesExistOnPod(f, ovnNamespace, dbPod, allDBFiles) { + if !allFilesExistOnPod(f, ovnKubeNamespace, dbPod, allDBFiles) { framework.Failf("Error: db files not found") } // Delete the db files from the db-pod framework.Logf("deleting db files from db pod") for _, db_file := range DBFileNamesToDelete { - deleteFileFromPod(f, ovnNamespace, dbPod, db_file) + deleteFileFromPod(f, ovnKubeNamespace, dbPod, db_file) } // Delete the db-pod in order to emulate the cluster/pod restart framework.Logf("deleting db pod %s", dbPod.Name) - deletePod(f, ovnNamespace, dbPod.Name) + deletePod(f, ovnKubeNamespace, dbPod.Name) framework.Logf("wait for db pod to finish full restart") waitForPodToFinishFullRestart(f, dbPod) @@ -2310,7 +2313,7 @@ var _ = ginkgo.Describe("e2e delete databases", func() { // Check db files existence // Check that all files are on pod framework.Logf("make sure that all the db files are on db pod %s", dbPod.Name) - if !allFilesExistOnPod(f, ovnNamespace, dbPod, allDBFiles) { + if !allFilesExistOnPod(f, ovnKubeNamespace, dbPod, allDBFiles) { framework.Failf("Error: db files not found") } @@ -2346,8 +2349,8 @@ var _ = ginkgo.Describe("e2e delete databases", func() { "No separate db pods in muliple zones interconnect deployment", ) } - - dbDeployment := getDeployment(f, ovnNamespace, "ovnkube-db") + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + dbDeployment := getDeployment(f, ovnKubeNamespace, "ovnkube-db") dbPods, err := e2edeployment.GetPodsForDeployment(context.TODO(), f.ClientSet, dbDeployment) if err != nil { framework.Failf("Error: Failed to get pods, err: %v", err) @@ -2366,7 +2369,7 @@ var _ = ginkgo.Describe("e2e delete databases", func() { framework.Logf("deleting db pod: %v", dbPodName) // Delete the db-pod in order to emulate the pod restart dbPod.Status.Message = "check" - deletePod(f, ovnNamespace, dbPodName) + deletePod(f, ovnKubeNamespace, dbPodName) } framework.Logf("wait for all the Deployment to become ready again after pod deletion") @@ -2380,7 +2383,8 @@ var _ = ginkgo.Describe("e2e delete databases", func() { }) ginkgo.It("Should validate connectivity before and after deleting all the db-pods at once in HA mode", func() { - dbPods, err := e2epod.GetPods(context.TODO(), f.ClientSet, ovnNamespace, map[string]string{"name": databasePodPrefix}) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + dbPods, err := e2epod.GetPods(context.TODO(), f.ClientSet, ovnKubeNamespace, map[string]string{"name": databasePodPrefix}) if err != nil { framework.Failf("Error: Failed to get pods, err: %v", err) } @@ -2397,7 +2401,7 @@ var _ = ginkgo.Describe("e2e delete databases", func() { framework.Logf("deleting db pod: %v", dbPodName) // Delete the db-pod in order to emulate the pod restart dbPod.Status.Message = "check" - deletePod(f, ovnNamespace, dbPodName) + deletePod(f, ovnKubeNamespace, dbPodName) } framework.Logf("wait for all the pods to finish full restart") diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 9eecb3f1e2..0359b3461b 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -9,8 +9,12 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" "github.com/ovn-org/ovn-kubernetes/test/e2e/diagnostics" - + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "k8s.io/klog" "k8s.io/kubernetes/test/e2e/framework" e2econfig "k8s.io/kubernetes/test/e2e/framework/config" @@ -30,6 +34,23 @@ func handleFlags() { flag.Parse() } +var _ = ginkgo.BeforeSuite(func() { + // Make sure the framework's kubeconfig is set. + gomega.Expect(framework.TestContext.KubeConfig).NotTo(gomega.Equal(""), fmt.Sprintf("%s env var not set", clientcmd.RecommendedConfigPathEnvVar)) + + _, err := framework.LoadClientset() + framework.ExpectNoError(err) + config, err := framework.LoadConfig() + framework.ExpectNoError(err) + err = infraprovider.Set(config) + framework.ExpectNoError(err, "must configure infrastructure provider") + deploymentconfig.Set() + client, err := clientset.NewForConfig(config) + framework.ExpectNoError(err, "k8 clientset is required to list nodes") + err = ipalloc.InitPrimaryIPAllocator(client.CoreV1().Nodes()) + framework.ExpectNoError(err, "failed to initialize node primary IP allocator") +}) + // required due to go1.13 issue: https://github.com/onsi/ginkgo/issues/602 func TestMain(m *testing.M) { // Register test flags, then parse flags. @@ -41,7 +62,8 @@ func TestMain(m *testing.M) { } os.Exit(0) } - + // reset provider to skeleton as Kubernetes test framework expects a supported provider + framework.TestContext.Provider = "skeleton" framework.AfterReadingAllFlags(&framework.TestContext) // TODO: Deprecating repo-root over time... instead just use gobindata_util.go , see #23987. diff --git a/test/e2e/egress_firewall.go b/test/e2e/egress_firewall.go index 529f3dc034..e5a3f8518a 100644 --- a/test/e2e/egress_firewall.go +++ b/test/e2e/egress_firewall.go @@ -11,6 +11,11 @@ import ( "strings" "time" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/onsi/ginkgo/extensions/table" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -108,46 +113,51 @@ var _ = ginkgo.Describe("e2e egress firewall policy validation", func() { ginkgo.Context("with external containers", func() { const ( - ciNetworkName = "kind" externalContainerName1 = "e2e-egress-fw-external-container1" externalContainerName2 = "e2e-egress-fw-external-container2" - externalContainerPort1 = 1111 - externalContainerPort2 = 2222 ) var ( - singleIPMask, subnetMask string - externalContainer1IP, externalContainer2IP string + singleIPMask, subnetMask string + providerCtx infraapi.Context + externalContainer1 infraapi.ExternalContainer + externalContainer2 infraapi.ExternalContainer ) - checkConnectivity := func(srcPodName, dstIP string, dstPort int, shouldSucceed bool) { + getExternalContainerIP := func(externalContainer infraapi.ExternalContainer) string { + if IsIPv6Cluster(f.ClientSet) { + return externalContainer.GetIPv6() + } + return externalContainer.GetIPv4() + } + + checkConnectivity := func(srcPodName, dstIP string, dstPort string, shouldSucceed bool) { testContainer := fmt.Sprintf("%s-container", srcPodName) testContainerFlag := fmt.Sprintf("--container=%s", testContainer) if shouldSucceed { gomega.Eventually(func() bool { _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, fmt.Sprint(dstPort))) + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, dstPort)) return err == nil }, time.Duration(2*testTimeout)*time.Second).Should(gomega.BeTrue(), - fmt.Sprintf("expected connection from %s to [%s]:%d to suceed", srcPodName, dstIP, dstPort)) + fmt.Sprintf("expected connection from %s to [%s]:%s to suceed", srcPodName, dstIP, dstPort)) } else { gomega.Consistently(func() bool { _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, fmt.Sprint(dstPort))) + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, dstPort)) return err != nil }, time.Duration(2*testTimeout)*time.Second).Should(gomega.BeTrue(), - fmt.Sprintf("expected connection from %s to [%s]:%d to fail", srcPodName, dstIP, dstPort)) + fmt.Sprintf("expected connection from %s to [%s]:%s to fail", srcPodName, dstIP, dstPort)) } } - checkExternalContainerConnectivity := func(containerName, dstIP string, dstPort int) { - cmd := []string{"docker", "exec", containerName, - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, fmt.Sprint(dstPort))} - framework.Logf("Running command %v", cmd) - _, err := runCommand(cmd...) + checkExternalContainerConnectivity := func(externalContainer infraapi.ExternalContainer, dstIP string, dstPort int) { + _, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{ + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(dstIP, fmt.Sprint(dstPort)), + }) if err != nil { framework.Failf("Failed to connect from external container %s to %s:%d: %v", - containerName, dstIP, dstPort, err) + externalContainer.GetName(), dstIP, dstPort, err) } } @@ -157,7 +167,7 @@ var _ = ginkgo.Describe("e2e egress firewall policy validation", func() { // We don't know what causes that behaviour, so given function is a workaround for this issue. // It also only historically fails for the first ef test "Should validate the egress firewall policy functionality for allowed IP", // so only used there for now. - createSrcPodWithRetry := func(retries int, reachableDst string, reachablePort int, + createSrcPodWithRetry := func(retries int, reachableDst string, reachablePort string, podName, nodeName string, ipCheckInterval, ipCheckTimeout time.Duration, f *framework.Framework) { for i := 0; i < retries; i++ { createSrcPod(podName, nodeName, ipCheckInterval, ipCheckTimeout, f) @@ -165,7 +175,7 @@ var _ = ginkgo.Describe("e2e egress firewall policy validation", func() { testContainerFlag := fmt.Sprintf("--container=%s", testContainer) for connectRetry := 0; connectRetry < 5; connectRetry++ { _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", podName, testContainerFlag, "--", - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(reachableDst, fmt.Sprint(reachablePort))) + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(reachableDst, reachablePort)) if err == nil { return } @@ -173,41 +183,51 @@ var _ = ginkgo.Describe("e2e egress firewall policy validation", func() { err := deletePodWithWaitByName(context.TODO(), f.ClientSet, podName, f.Namespace.Name) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } - framework.Failf("Failed to create pod %s that can reach %s:%d after %d retries", podName, reachableDst, reachablePort, retries) + framework.Failf("Failed to create pod %s that can reach %s:%s after %d retries", podName, reachableDst, reachablePort, retries) } ginkgo.BeforeEach(func() { - externalContainer1IPV4, externalContainer1IPV6 := createClusterExternalContainer(externalContainerName1, agnhostImage, - []string{"--network", ciNetworkName, "-p", fmt.Sprintf("%d:%d", externalContainerPort1, externalContainerPort1)}, - []string{"netexec", fmt.Sprintf("--http-port=%d", externalContainerPort1)}) - - externalContainer2IPV4, externalContainer2IPV6 := createClusterExternalContainer(externalContainerName2, agnhostImage, - []string{"--network", ciNetworkName, "-p", fmt.Sprintf("%d:%d", externalContainerPort2, externalContainerPort2)}, - []string{"netexec", fmt.Sprintf("--http-port=%d", externalContainerPort2)}) - - if IsIPv6Cluster(f.ClientSet) { - externalContainer1IP = externalContainer1IPV6 - externalContainer2IP = externalContainer2IPV6 - } else { - externalContainer1IP = externalContainer1IPV4 - externalContainer2IP = externalContainer2IPV4 + providerCtx = infraprovider.Get().NewTestContext() + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + // container 1 + externalContainer1Port := infraprovider.Get().GetExternalContainerPort() + framework.ExpectNoError(err, "must get unique container port for external container 1") + externalContainer1Spec := infraapi.ExternalContainer{ + Name: externalContainerName1, + Image: images.AgnHost(), + Network: primaryProviderNetwork, + Args: []string{"netexec", fmt.Sprintf("--http-port=%d", externalContainer1Port)}, + ExtPort: externalContainer1Port, } + externalContainer1, err = providerCtx.CreateExternalContainer(externalContainer1Spec) + framework.ExpectNoError(err, "must create external container 1") + + // container 2 + externalContainer2Port := infraprovider.Get().GetExternalContainerPort() + framework.ExpectNoError(err, "must get unique container port for external container 2") + externalContainer2Spec := infraapi.ExternalContainer{ + Name: externalContainerName2, + Image: images.AgnHost(), + Network: primaryProviderNetwork, + Args: []string{"netexec", fmt.Sprintf("--http-port=%d", externalContainer2Port)}, + ExtPort: externalContainer2Port, + } + externalContainer2, err = providerCtx.CreateExternalContainer(externalContainer2Spec) + framework.ExpectNoError(err, "must create external container 2") gomega.Eventually(func() bool { - cmd := []string{"docker", "exec", externalContainerName1, - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(externalContainer2IP, fmt.Sprint(externalContainerPort2))} - framework.Logf("Running command %v", cmd) - _, err := runCommand(cmd...) + _, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer1, []string{ + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(getExternalContainerIP(externalContainer2), fmt.Sprint(externalContainer2.GetPortStr())), + }) if err != nil { - framework.Logf("Failed: %v", err) + framework.Logf("Failed to connect to container 2 from container 1: %v", err) return false } - cmd = []string{"docker", "exec", externalContainerName2, - "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(externalContainer1IP, fmt.Sprint(externalContainerPort1))} - framework.Logf("Running command %v", cmd) - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{ + "curl", "-s", "--connect-timeout", fmt.Sprint(testTimeout), net.JoinHostPort(getExternalContainerIP(externalContainer1), fmt.Sprint(externalContainer1.GetPortStr())), + }) if err != nil { - framework.Logf("Failed: %v", err) + framework.Logf("Failed to connect to container 1 from container 2: %v", err) return false } return true @@ -221,16 +241,10 @@ var _ = ginkgo.Describe("e2e egress firewall policy validation", func() { } }) - ginkgo.AfterEach(func() { - deleteClusterExternalContainer(externalContainerName1) - deleteClusterExternalContainer(externalContainerName2) - }) - ginkgo.It("Should validate the egress firewall policy functionality for allowed IP", func() { srcPodName := "e2e-egress-fw-src-pod" - // create the pod that will be used as the source for the connectivity test - createSrcPodWithRetry(3, externalContainer1IP, externalContainerPort1, + createSrcPodWithRetry(3, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), srcPodName, serverNodeInfo.name, retryInterval, retryTimeout, f) // egress firewall crd yaml configuration @@ -247,18 +261,18 @@ spec: - type: Deny to: cidrSelector: %s -`, f.Namespace.Name, externalContainer1IP, singleIPMask, denyAllCIDR) +`, f.Namespace.Name, getExternalContainerIP(externalContainer1), singleIPMask, denyAllCIDR) applyEF(egressFirewallConfig, f.Namespace.Name) // Verify the remote host/port as explicitly allowed by the firewall policy is reachable ginkgo.By(fmt.Sprintf("Verifying connectivity to an explicitly allowed host %s is permitted as defined "+ - "by the external firewall policy", externalContainer1IP)) - checkConnectivity(srcPodName, externalContainer1IP, externalContainerPort1, true) + "by the external firewall policy", getExternalContainerIP(externalContainer1))) + checkConnectivity(srcPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), true) // Verify the remote host/port as implicitly denied by the firewall policy is not reachable ginkgo.By(fmt.Sprintf("Verifying connectivity to an implicitly denied host %s is not permitted as defined "+ - "by the external firewall policy", externalContainer2IP)) - checkConnectivity(srcPodName, externalContainer2IP, externalContainerPort2, false) + "by the external firewall policy", getExternalContainerIP(externalContainer2))) + checkConnectivity(srcPodName, getExternalContainerIP(externalContainer2), externalContainer2.GetPortStr(), false) }) ginkgo.It("Should validate the egress firewall policy functionality for allowed CIDR and port", func() { @@ -276,11 +290,11 @@ spec: cidrSelector: %s/%s ports: - protocol: TCP - port: %d + port: %s - type: Deny to: cidrSelector: %s -`, f.Namespace.Name, externalContainer1IP, subnetMask, externalContainerPort1, denyAllCIDR) +`, f.Namespace.Name, getExternalContainerIP(externalContainer1), subnetMask, externalContainer1.GetPortStr(), denyAllCIDR) applyEF(egressFirewallConfig, f.Namespace.Name) // create the pod that will be used as the source for the connectivity test @@ -288,13 +302,13 @@ spec: // Verify the remote host/port as explicitly allowed by the firewall policy is reachable ginkgo.By(fmt.Sprintf("Verifying connectivity to an explicitly allowed port on host %s is permitted as "+ - "defined by the external firewall policy", externalContainer1IP)) - checkConnectivity(srcPodName, externalContainer1IP, externalContainerPort1, true) + "defined by the external firewall policy", getExternalContainerIP(externalContainer1))) + checkConnectivity(srcPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), true) // Verify the remote host/port as implicitly denied by the firewall policy is not reachable ginkgo.By(fmt.Sprintf("Verifying connectivity to an implicitly denied port on host %s is not permitted as "+ - "defined by the external firewall policy", externalContainer2IP)) - checkConnectivity(srcPodName, externalContainer2IP, externalContainerPort2, false) + "defined by the external firewall policy", getExternalContainerIP(externalContainer2))) + checkConnectivity(srcPodName, getExternalContainerIP(externalContainer2), externalContainer2.GetPortStr(), false) }) ginkgo.It("Should validate the egress firewall allows inbound connections", func() { @@ -304,9 +318,9 @@ spec: // 4. Check only inbound traffic is allowed efPodName := "e2e-egress-fw-pod" - efPodPort := 1234 + var efPodPort uint16 = 1234 serviceName := "service-for-pods" - servicePort := 31234 + var servicePort uint16 = 31234 ginkgo.By("Creating the egress firewall pod") // 1. create nodePort service and external container @@ -318,7 +332,7 @@ spec: } ginkgo.By("Creating the nodePort service") - _, err = createServiceForPodsWithLabel(f, f.Namespace.Name, int32(servicePort), strconv.Itoa(efPodPort), "NodePort", endpointsSelector) + _, err = createServiceForPodsWithLabel(f, f.Namespace.Name, servicePort, efPodPort, "NodePort", endpointsSelector) framework.ExpectNoError(err, fmt.Sprintf("unable to create nodePort service, err: %v", err)) ginkgo.By("Waiting for the endpoints to pop up") @@ -327,9 +341,9 @@ spec: // 2. Check connectivity works both ways // pod -> external container should work - ginkgo.By(fmt.Sprintf("Verifying connectivity from pod %s to external container [%s]:%d", - efPodName, externalContainer1IP, externalContainerPort1)) - checkConnectivity(efPodName, externalContainer1IP, externalContainerPort1, true) + ginkgo.By(fmt.Sprintf("Verifying connectivity from pod %s to external container [%s]:%s", + efPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr())) + checkConnectivity(efPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), true) // external container -> nodePort svc should work svc, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Get(context.TODO(), serviceName, metav1.GetOptions{}) @@ -337,8 +351,8 @@ spec: nodeIP := serverNodeInfo.nodeIP ginkgo.By(fmt.Sprintf("Verifying connectivity from external container %s to nodePort svc [%s]:%d", - externalContainer1IP, nodeIP, svc.Spec.Ports[0].NodePort)) - checkExternalContainerConnectivity(externalContainerName1, nodeIP, int(svc.Spec.Ports[0].NodePort)) + getExternalContainerIP(externalContainer1), nodeIP, svc.Spec.Ports[0].NodePort)) + checkExternalContainerConnectivity(externalContainer1, nodeIP, int(svc.Spec.Ports[0].NodePort)) // 3. Apply deny-all egress firewall and wait for it to be applied var egressFirewallConfig = fmt.Sprintf(`kind: EgressFirewall @@ -356,20 +370,20 @@ spec: // 4. Check that only inbound traffic is allowed // pod -> external container should be blocked - ginkgo.By(fmt.Sprintf("Verifying connectivity from pod %s to external container [%s]:%d is blocked", - efPodName, externalContainer1IP, externalContainerPort1)) - checkConnectivity(efPodName, externalContainer1IP, externalContainerPort1, false) + ginkgo.By(fmt.Sprintf("Verifying connectivity from pod %s to external container [%s]:%s is blocked", + efPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr())) + checkConnectivity(efPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), false) // external container -> nodePort svc should work ginkgo.By(fmt.Sprintf("Verifying connectivity from external container %s to nodePort svc [%s]:%d", - externalContainer1IP, nodeIP, svc.Spec.Ports[0].NodePort)) - checkExternalContainerConnectivity(externalContainerName1, nodeIP, int(svc.Spec.Ports[0].NodePort)) + getExternalContainerIP(externalContainer1), nodeIP, svc.Spec.Ports[0].NodePort)) + checkExternalContainerConnectivity(externalContainer1, nodeIP, int(svc.Spec.Ports[0].NodePort)) }) ginkgo.It("Should validate the egress firewall doesn't affect internal connections", func() { srcPodName := "e2e-egress-fw-src-pod" dstPodName := "e2e-egress-fw-dst-pod" - dstPort := 1234 + dstPort := "1234" // egress firewall crd yaml configuration var egressFirewallConfig = fmt.Sprintf(`kind: EgressFirewall apiVersion: k8s.ovn.org/v1 @@ -389,7 +403,7 @@ spec: // create dst pod dstPod, err := createPod(f, dstPodName, serverNodeInfo.name, f.Namespace.Name, - []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%d", dstPort)}, nil) + []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", dstPort)}, nil) if err != nil { framework.Failf("Failed to create dst pod %s: %v", dstPodName, err) } @@ -399,8 +413,8 @@ spec: checkConnectivity(srcPodName, dstPodIP, dstPort, true) ginkgo.By(fmt.Sprintf("Verifying connectivity to an external host %s is not permitted as defined "+ - "by the external firewall policy", externalContainer1IP)) - checkConnectivity(srcPodName, externalContainer1IP, externalContainerPort1, false) + "by the external firewall policy", getExternalContainerIP(externalContainer1))) + checkConnectivity(srcPodName, getExternalContainerIP(externalContainer1), externalContainer1.GetPortStr(), false) }) ginkgo.It("Should validate that egressfirewall supports DNS name in caps", func() { @@ -575,25 +589,19 @@ spec: for _, ip := range ipFamilies { // manually add the a secondary IP to each node framework.Logf("Adding IP %s to node %s", ip, nodeName) - _, err = runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "add", ip, "dev", "breth0") + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{ + "ip", "addr", "add", ip, "dev", deploymentconfig.Get().PrimaryInterfaceName(), + }) if err != nil && !strings.Contains(err.Error(), "Address already assigned") { framework.Failf("failed to add new IP address %s to node %s: %v", ip, nodeName, err) } + ginkgo.DeferCleanup(func() error { + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "addr", "delete", ip, "dev", deploymentconfig.Get().PrimaryInterfaceName()}) + return err + }) toCurlSecondaryNodeIPAddresses.Insert(ip) } } - defer func() { - for nodeName, ipFamilies := range node2ndaryIPs { - for _, ip := range ipFamilies { - // manually add the a secondary IP to each node - framework.Logf("Deleting IP %s from node %s", ip, nodeName) - _, err = runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "del", ip, "dev", "breth0") - if err != nil { - framework.Logf("failed to delete secondary ip from the node %s: %v", nodeName, err) - } - } - } - }() ginkgo.By("Should NOT be able to reach each host networked pod via node selector") hostNetworkPortStr := fmt.Sprint(hostNetworkPort) diff --git a/test/e2e/egress_services.go b/test/e2e/egress_services.go index 6d5120b8c9..eb9cb38942 100644 --- a/test/e2e/egress_services.go +++ b/test/e2e/egress_services.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "os" + "strconv" "strings" "sync" "time" @@ -13,9 +14,16 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" + "golang.org/x/sync/errgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" @@ -26,26 +34,41 @@ import ( var _ = ginkgo.Describe("EgressService", func() { const ( - egressServiceYAML = "egress_service.yaml" - externalKindContainerName = "kind-external-container-for-egress-service" - podHTTPPort = 8080 - serviceName = "test-egress-service" - blackholeRoutingTable = "100" + egressServiceYAML = "egress_service.yaml" + externalContainerName = "external-container-for-egress-service" + podHTTPPort = "8080" + serviceName = "test-egress-service" + blackholeRoutingTable = "100" ) - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)} - pods := []string{"pod1", "pod2", "pod3"} - podsLabels := map[string]string{"egress": "please"} - var ( - externalKindIPv4 string - externalKindIPv6 string - nodes []v1.Node + command = []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} + pods = []string{"pod1", "pod2", "pod3"} + podsLabels = map[string]string{"egress": "please"} + nodes []v1.Node + externalContainer infraapi.ExternalContainer + f = wrappedTestFramework("egress-services") + providerCtx infraapi.Context ) - f := wrappedTestFramework("egress-services") + skipIfProtoNotAvailableFn := func(protocol v1.IPFamily, container infraapi.ExternalContainer) { + if protocol == v1.IPv4Protocol && !container.IsIPv4() { + ginkgo.Skip("skipped because external container does not have an IPv4 address") + } + if protocol == v1.IPv6Protocol && !container.IsIPv6() { + ginkgo.Skip("skipped because external container does not have an IPv6 address") + } + // FIXME(mk): consider dualstack clusters + if IsIPv6Cluster(f.ClientSet) && protocol == v1.IPv4Protocol { + ginkgo.Skip("skipped because cluster is IPv6") + } + if !IsIPv6Cluster(f.ClientSet) && protocol == v1.IPv6Protocol { + ginkgo.Skip("skipped because cluster is IPv4") + } + } ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() var err error clientSet := f.ClientSet n, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), clientSet, 3) @@ -56,19 +79,23 @@ var _ = ginkgo.Describe("EgressService", func() { len(n.Items)) } nodes = n.Items - ginkgo.By("Creating the external kind container to send the traffic to/from") - externalKindIPv4, externalKindIPv6 = createClusterExternalContainer(externalKindContainerName, agnhostImage, - []string{"--privileged", "--network", "kind"}, []string{"netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)}) - - }) - - ginkgo.AfterEach(func() { - deleteClusterExternalContainer(externalKindContainerName) - flushCustomRoutingTablesOnNodes(nodes, blackholeRoutingTable) + ginkgo.By("Creating the external component to send the traffic to/from") + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary provider network") + externalContainer = infraapi.ExternalContainer{Name: externalContainerName, Image: images.AgnHost(), + Network: primaryProviderNetwork, ExtPort: 8080, + Args: getAgnHostHTTPPortBindCMDArgs(8080)} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to create external container") }) ginkgo.DescribeTable("Should validate pods' egress is SNATed to the LB's ingress ip without selectors", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) + dstIP := externalContainer.GetIPv4() + if protocol == v1.IPv6Protocol { + dstIP = externalContainer.GetIPv6() + } ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} for i, name := range pods { @@ -113,18 +140,18 @@ spec: _, egressHostV4IP, egressHostV6IP := getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) ginkgo.By("Setting the static route on the external container for the service via the egress host ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) ginkgo.By("Verifying the pods reach the external container with the service's ingress ip") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -133,10 +160,10 @@ spec: ginkgo.By("Verifying the external container can reach all of the service's backend pods") // This is to be sure we did not break ingress traffic for the service - reachAllServiceBackendsFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort, pods) + reachAllServiceBackendsFromExternalContainer(externalContainer, svcIP, podHTTPPort, pods) ginkgo.By("Creating the custom network") - setBlackholeRoutingTableOnNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + setBlackholeRoutingTableOnNodes(providerCtx, nodes, externalContainer, blackholeRoutingTable, protocol == v1.IPv4Protocol) ginkgo.By("Updating the resource to contain a Network") egressServiceConfig = fmt.Sprintf(` @@ -157,7 +184,7 @@ spec: ginkgo.By("Verifying the pods can't reach the external container due to the blackhole in the custom network") gomega.Consistently(func() error { for _, pod := range pods { - err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) if err != nil && !strings.Contains(err.Error(), "exit code 28") { return fmt.Errorf("expected err to be a connection timed out due to blackhole, got: %w", err) } @@ -170,10 +197,10 @@ spec: }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "managed to reach external container despite blackhole") ginkgo.By("Removing the blackhole to the external container the pods should be able to reach it with the loadbalancer's ingress ip") - delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalContainer.GetIPv4(), externalContainer.GetIPv6(), protocol == v1.IPv4Protocol) gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -192,20 +219,25 @@ spec: } gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") } }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), ) ginkgo.DescribeTable("[LGW] Should validate pods' egress uses node's IP when setting Network without SNAT", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) + dstIP := externalContainer.GetIPv4() + if protocol == v1.IPv6Protocol { + dstIP = externalContainer.GetIPv6() + } ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} for i, name := range pods { @@ -248,12 +280,12 @@ spec: svcIP := svc.Status.LoadBalancer.Ingress[0].IP ginkgo.By("Creating the custom network") - setBlackholeRoutingTableOnNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + setBlackholeRoutingTableOnNodes(providerCtx, nodes, externalContainer, blackholeRoutingTable, protocol == v1.IPv4Protocol) ginkgo.By("Verifying the pods can't reach the external container due to the blackhole in the custom network") gomega.Consistently(func() error { for _, pod := range pods { - err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) if err != nil && !strings.Contains(err.Error(), "exit code 28") { return fmt.Errorf("expected err to be a connection timed out due to blackhole, got: %w", err) } @@ -266,7 +298,7 @@ spec: }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "managed to reach external container despite blackhole") ginkgo.By("Removing the blackhole to the external container the pods should be able to reach it with the node's IP") - delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalContainer.GetIPv4(), externalContainer.GetIPv6(), protocol == v1.IPv4Protocol) for i, pod := range pods { node := &nodes[i] v4, v6 := getNodeAddresses(node) @@ -276,22 +308,22 @@ spec: } gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") } // Re-adding the blackhole and deleting the EgressService to verify that the pods go back to use the main network. ginkgo.By("Re-adding the blackhole the pods should not be able to reach the external container") - setBlackholeRoutingTableOnNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + setBlackholeRoutingTableOnNodes(providerCtx, nodes, externalContainer, blackholeRoutingTable, protocol == v1.IPv4Protocol) ginkgo.By("Verifying the pods can't reach the external container due to the blackhole in the custom network") gomega.Consistently(func() error { for _, pod := range pods { - err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) if err != nil && !strings.Contains(err.Error(), "exit code 28") { return fmt.Errorf("expected err to be a connection timed out due to blackhole, got: %w", err) } @@ -315,20 +347,21 @@ spec: } gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") } }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), ) ginkgo.DescribeTable("Should validate the egress SVC SNAT functionality against host-networked pods", func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} podsToNodeMapping := make(map[string]v1.Node, 3) @@ -381,25 +414,26 @@ spec: } } ginkgo.By("By setting a secondary IP on non-egress node acting as \"another node\"") - var otherDstIP string + var otherDstIP net.IP if protocol == v1.IPv6Protocol { - otherDstIP = "fc00:f853:ccd:e793:ffff::1" + otherDstIP, err = ipalloc.NewPrimaryIPv6() } else { - // TODO(mk): replace with non-repeating IP allocator - otherDstIP = "172.18.1.1" + otherDstIP, err = ipalloc.NewPrimaryIPv4() } - _, err = runCommand(containerRuntime, "exec", dstNode.Name, "ip", "addr", "add", otherDstIP, "dev", "breth0") + framework.ExpectNoError(err, "failed to allocate secondary node IP") + otherDst := otherDstIP.String() + ginkgo.By(fmt.Sprintf("adding secondary IP %q to node %s", otherDst, dstNode.Name)) + extBridgeName := deploymentconfig.Get().ExternalBridgeName() + _, err = infraprovider.Get().ExecK8NodeCommand(dstNode.Name, []string{"ip", "addr", "add", otherDst, "dev", extBridgeName}) if err != nil { framework.Failf("failed to add address to node %s: %v", dstNode.Name, err) } - defer func() { - _, err = runCommand(containerRuntime, "exec", dstNode.Name, "ip", "addr", "delete", otherDstIP, "dev", "breth0") - if err != nil { - framework.Failf("failed to remove address from node %s: %v", dstNode.Name, err) - } - }() - ginkgo.By("Creating host-networked pod on non-egress node acting as \"another node\"") - _, err = createPod(f, hostNetPod, dstNode.Name, f.Namespace.Name, []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)}, map[string]string{}, func(p *v1.Pod) { + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecK8NodeCommand(dstNode.Name, []string{"ip", "addr", "delete", otherDst, "dev", extBridgeName}) + return err + }) + ginkgo.By(fmt.Sprintf("Creating host-networked pod on non-egress node %s acting as \"another node\"", dstNode.Name)) + _, err = createPod(f, hostNetPod, dstNode.Name, f.Namespace.Name, []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)}, map[string]string{}, func(p *v1.Pod) { p.Spec.HostNetwork = true }) framework.ExpectNoError(err) @@ -426,7 +460,7 @@ spec: return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expectedsrcIP, dstIP, podHTTPPort) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach other node with node's primary ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expectedsrcIP, otherDstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expectedsrcIP, otherDst, podHTTPPort) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach other node with node's secondary ip") } }, @@ -435,7 +469,12 @@ spec: ) ginkgo.DescribeTable("Should validate pods' egress is SNATed to the LB's ingress ip with selectors", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) + externalContainerIP := externalContainer.GetIPv4() + if protocol == v1.IPv6Protocol { + externalContainerIP = externalContainer.GetIPv6() + } ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} index := 0 @@ -483,18 +522,18 @@ spec: node, egressHostV4IP, egressHostV6IP := getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) gomega.Expect(node.Name).To(gomega.Equal(firstNode), "the wrong node got selected for egress service") ginkgo.By("Setting the static route on the external container for the service via the first node's ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) ginkgo.By("Verifying the pods reach the external container with the service's ingress ip") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -503,7 +542,7 @@ spec: ginkgo.By("Verifying the external container can reach all of the service's backend pods") // This is to be sure we did not break ingress traffic for the service - reachAllServiceBackendsFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort, pods) + reachAllServiceBackendsFromExternalContainer(externalContainer, svcIP, podHTTPPort, pods) ginkgo.By("Updating the egress service to select the second node") secondNode := nodes[1].Name @@ -532,18 +571,18 @@ spec: gomega.Expect(len(nodeList.Items)).To(gomega.Equal(1), fmt.Sprintf("expected only one node labeled for the service, got %v", nodeList.Items)) ginkgo.By("Setting the static route on the external container for the service via the second node's ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) ginkgo.By("Verifying the pods reach the external container with the service's ingress ip again") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -551,7 +590,7 @@ spec: }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") ginkgo.By("Verifying the external container can reach all of the service's backend pods") - reachAllServiceBackendsFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort, pods) + reachAllServiceBackendsFromExternalContainer(externalContainer, svcIP, podHTTPPort, pods) ginkgo.By("Updating the egress service selector to match no node") egressServiceConfig = fmt.Sprintf(` @@ -592,11 +631,11 @@ spec: } gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, externalContainerIP, externalContainer.GetPortStr()) }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, externalContainerIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") } @@ -627,32 +666,37 @@ spec: gomega.Expect(len(nodeList.Items)).To(gomega.Equal(1), fmt.Sprintf("expected only one node labeled for the service, got %v", nodeList.Items)) ginkgo.By("Setting the static route on the external container for the service via the third node's ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) ginkgo.By("Verifying the pods reach the external container with the service's ingress ip again") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, externalContainerIP, externalContainer.GetPortStr()); err != nil { return err } } return nil }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") - reachAllServiceBackendsFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort, pods) + reachAllServiceBackendsFromExternalContainer(externalContainer, svcIP, podHTTPPort, pods) }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), ) ginkgo.DescribeTable("Should validate egress service has higher priority than EgressIP when not assigned to the same node", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) + dstIP := externalContainer.GetIPv4() + if protocol == v1.IPv6Protocol { + dstIP = externalContainer.GetIPv6() + } labels := map[string]string{"wants": "egress"} ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} @@ -700,7 +744,7 @@ spec: _, egressHostV4IP, egressHostV6IP := getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) ginkgo.By("Setting the static route on the external container for the service via the egress host ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) // Assign the egress IP without conflicting with any node IP, // the kind subnet is /16 or /64 so the following should be fine. @@ -710,15 +754,14 @@ spec: defer func() { e2ekubectl.RunKubectlOrDie("default", "label", "node", eipNode.Name, "k8s.ovn.org/egress-assignable-") }() - nodev4IP, nodev6IP := getNodeAddresses(&eipNode) - egressNodeIP := net.ParseIP(nodev4IP) - if utilnet.IsIPv6String(svcIP) { - egressNodeIP = net.ParseIP(nodev6IP) + // allocate EIP IP + var egressIP net.IP + if IsIPv6Cluster(f.ClientSet) { + egressIP, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP, err = ipalloc.NewPrimaryIPv4() } - egressIP := make(net.IP, len(egressNodeIP)) - copy(egressIP, egressNodeIP) - egressIP[len(egressIP)-2]++ - + framework.ExpectNoError(err, "must allocate new primary network IP address") egressIPYaml := "egressip.yaml" egressIPConfig := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -750,16 +793,40 @@ spec: e2ekubectl.RunKubectlOrDie("default", "delete", "eip", "egress-svc-test-eip") }() + ginkgo.By("wait until egress IP is assigned") + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + egressIPs := egressIPs{} + egressIPStdout, err := e2ekubectl.RunKubectl("default", "get", "eip", "-o", "json") + if err != nil { + framework.Logf("Error: failed to get the EgressIP object, err: %v", err) + return false, nil + } + err = json.Unmarshal([]byte(egressIPStdout), &egressIPs) + if err != nil { + panic(err.Error()) + } + if len(egressIPs.Items) == 0 { + return false, nil + } + if len(egressIPs.Items) > 1 { + framework.Failf("Didn't expect to retrieve more than one egress IP during the execution of this test, saw: %v", len(egressIPs.Items)) + } + return len(egressIPs.Items[0].Status.Items) > 0, nil + }) + if err != nil { + framework.Failf("Error: expected to have 1 egress IP assignment, got: 0") + } + ginkgo.By("Verifying the pods reach the external container with the service's ingress ip") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -768,27 +835,32 @@ spec: ginkgo.By("Verifying the external container can reach all of the service's backend pods") // This is to be sure we did not break ingress traffic for the service - reachAllServiceBackendsFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort, pods) + reachAllServiceBackendsFromExternalContainer(externalContainer, svcIP, podHTTPPort, pods) ginkgo.By("Deleting the EgressService the backend pods should exit with the EgressIP") e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "delete", "-f", egressServiceYAML) for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, egressIP.String(), *dstIP, podHTTPPort) - }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with eip") + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, egressIP.String(), dstIP, externalContainer.GetPortStr()) + }, 10*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with eip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, egressIP.String(), *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, egressIP.String(), dstIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with eip") } }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), ) ginkgo.DescribeTable("Should validate a node with a local ep is selected when ETP=Local", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily) { + skipIfProtoNotAvailableFn(protocol, externalContainer) + dstIP := externalContainer.GetIPv4() + if protocol == v1.IPv6Protocol { + dstIP = externalContainer.GetIPv6() + } ginkgo.By("Creating two backend pods on the second node") firstNode := nodes[0].Name secondNode := nodes[1].Name @@ -869,18 +941,18 @@ spec: gomega.Expect(node.Name).To(gomega.Equal(firstNode), "the wrong node got selected for egress service") ginkgo.By("Setting the static route on the external container for the service via the first node's ip") - setSVCRouteOnContainer(externalKindContainerName, svcIP, egressHostV4IP, egressHostV6IP) + setSVCRouteOnExternalContainer(externalContainer, svcIP, egressHostV4IP, egressHostV6IP) ginkgo.By("Verifying the pods reach the external container with the service's ingress ip") for _, pod := range pods { gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()) }, 5*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with loadbalancer's ingress ip") } gomega.Consistently(func() error { for _, pod := range pods { - if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, *dstIP, podHTTPPort); err != nil { + if err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, svcIP, dstIP, externalContainer.GetPortStr()); err != nil { return err } } @@ -921,20 +993,20 @@ spec: } gomega.Eventually(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") gomega.Consistently(func() error { - return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, *dstIP, podHTTPPort) + return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dstIP, externalContainer.GetPortStr()) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach external container with node's ip") } }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), ) ginkgo.DescribeTable("[LGW] Should validate ingress reply traffic uses the Network", - func(protocol v1.IPFamily, dstIP *string) { + func(protocol v1.IPFamily, isIPv6 bool) { ginkgo.By("Creating the backend pods") podsCreateSync := errgroup.Group{} createdPods := []*v1.Pod{} @@ -990,29 +1062,34 @@ spec: gomega.Expect(err).ToNot(gomega.HaveOccurred()) ginkgo.By("Setting the routes on the external container to reach the service") - v4Via, v6Via := getContainerAddressesForNetwork(createdPods[0].Spec.NodeName, primaryNetworkName) // if it's host=ALL, just pick a node with an ep + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "provider primary network must be available") + nodeNetworkInterface, err := infraprovider.Get().GetK8NodeNetworkInterface(createdPods[0].Spec.NodeName, providerPrimaryNetwork) + framework.ExpectNoError(err, "Node %s network %s information must be available", createdPods[0].Spec.NodeName, providerPrimaryNetwork.Name()) + v4Via, v6Via := nodeNetworkInterface.IPv4, nodeNetworkInterface.IPv6 // if it's host=ALL, just pick a node with an ep if sourceIPBy == "LoadBalancerIP" { _, v4Via, v6Via = getEgressSVCHost(f.ClientSet, f.Namespace.Name, serviceName) } - setSVCRouteOnContainer(externalKindContainerName, svcIP, v4Via, v6Via) + + setSVCRouteOnContainer(externalContainer, svcIP, v4Via, v6Via) ginkgo.By("Verifying the external client can reach the service") gomega.Eventually(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to eventually reach service from external container") gomega.Consistently(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach service from external container") ginkgo.By("Setting the blackhole on the custom network") - setBlackholeRoutingTableOnNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + setBlackholeRoutingTableOnNodes(providerCtx, nodes, externalContainer, blackholeRoutingTable, protocol == v1.IPv4Protocol) ginkgo.By("Verifying the external client can't reach the pods due to reply traffic hitting the blackhole in the custom network") gomega.Consistently(func() error { - out, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + out, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) if err != nil && !strings.Contains(err.Error(), "exit status 28") { return fmt.Errorf("expected err to be a connection timed out due to blackhole, got: %w", err) } @@ -1024,15 +1101,15 @@ spec: }, 3*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "managed to reach service despite blackhole") ginkgo.By("Removing the blackhole to the external container it should be able to reach the pods") - delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + delExternalClientBlackholeFromNodes(nodes, blackholeRoutingTable, externalContainer.GetIPv4(), externalContainer.GetIPv6(), protocol == v1.IPv4Protocol) gomega.Eventually(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to eventually reach service from external container") gomega.Consistently(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach service from external container") } @@ -1043,7 +1120,7 @@ spec: updateEgressServiceAndCheck("Network", v1.ServiceExternalTrafficPolicyLocal) ginkgo.By("Setting the blackhole on the custom network") - setBlackholeRoutingTableOnNodes(nodes, blackholeRoutingTable, externalKindIPv4, externalKindIPv6, protocol == v1.IPv4Protocol) + setBlackholeRoutingTableOnNodes(providerCtx, nodes, externalContainer, blackholeRoutingTable, protocol == v1.IPv4Protocol) ginkgo.By("Deleting the EgressService the external client should be able to reach the service") egressServiceConfig := fmt.Sprint(` apiVersion: k8s.ovn.org/v1 @@ -1063,17 +1140,17 @@ metadata: }() e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "delete", "-f", egressServiceYAML) gomega.Eventually(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 3*time.Second, 500*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to eventually reach service from external container") gomega.Consistently(func() error { - _, err := curlServiceAgnHostHostnameFromExternalContainer(externalKindContainerName, svcIP, podHTTPPort) + _, err := curlServiceAgnHostHostnameFromExternalContainer(externalContainer, svcIP, podHTTPPort) return err }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "failed to reach service from external container") }, - ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, &externalKindIPv4), - ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, &externalKindIPv6), + ginkgo.Entry("ipv4 pods", v1.IPv4Protocol, false), + ginkgo.Entry("ipv6 pods", v1.IPv6Protocol, true), ) ginkgo.Describe("Multiple Networks, external clients sharing ip", func() { @@ -1098,8 +1175,6 @@ metadata: IPv4CIDR string // IPv4CIDR for the container network IPv6CIDR string // IPv6CIDR for the container network containerName string // Container name to create on the network - containerIPv4 string // IPv4 assigned to the created container - containerIPv6 string // IPv6 assigned to the created container routingTable string // Routing table ID to set on nodes/EgressService nodesV4IPs map[string]string // The v4 IPs of the nodes corresponding to this network nodesV6IPs map[string]string // The v6 IPs of the nodes corresponding to this network @@ -1158,55 +1233,48 @@ metadata: ginkgo.By("Setting up the external networks and containers") for _, net := range []*netSettings{net1, net2} { ginkgo.By(fmt.Sprintf("Creating network %s", net.name)) - out, err := runCommand(containerRuntime, "network", "create", net.name, "--ipv6", "--subnet", net.IPv4CIDR, "--subnet", net.IPv6CIDR) - framework.ExpectNoError(err, "failed to create external network %s, out: %s", net.name, out) - + network, err := providerCtx.CreateNetwork(net.name, net.IPv4CIDR, net.IPv6CIDR) + framework.ExpectNoError(err, "failed to create external network %s, out: %s", net.name, err) ginkgo.By(fmt.Sprintf("Creating container %s", net.containerName)) // Setting the --hostname here is important since later we poke the container's /hostname endpoint - net.containerIPv4, net.containerIPv6 = createClusterExternalContainer(net.containerName, agnhostImage, - []string{"--privileged", "--network", net.name, "--hostname", net.containerName}, []string{"netexec", fmt.Sprintf("--http-port=%d", podHTTPPort)}) - + extContainerSecondaryNet := infraapi.ExternalContainer{Name: net.containerName, Image: images.AgnHost(), Network: network, + Args: []string{"netexec", "--http-port=8080"}, ExtPort: 8080} + extContainerSecondaryNet, err = providerCtx.CreateExternalContainer(extContainerSecondaryNet) ginkgo.By(fmt.Sprintf("Adding a listener for the shared IPv4 %s on %s", sharedIPv4, net.containerName)) - out, err = runCommand(containerRuntime, "exec", net.containerName, "ip", "address", "add", sharedIPv4+"/32", "dev", "lo") + out, err := infraprovider.Get().ExecExternalContainerCommand(extContainerSecondaryNet, []string{"ip", "address", "add", sharedIPv4 + "/32", "dev", "lo"}) framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the container %s, out: %s", net.containerName, out) ginkgo.By(fmt.Sprintf("Adding a listener for the shared IPv6 %s on %s", sharedIPv6, net.containerName)) - out, err = runCommand(containerRuntime, "exec", net.containerName, "ip", "address", "add", sharedIPv6+"/128", "dev", "lo") + out, err = infraprovider.Get().ExecExternalContainerCommand(extContainerSecondaryNet, []string{"ip", "address", "add", sharedIPv6 + "/128", "dev", "lo"}) framework.ExpectNoError(err, "failed to add the ipv6 loopback ip to dev lo on the container %s, out: %s", net.containerName, out) // Connecting the nodes (kind containers) to the networks and creating the routing table for _, node := range nodes { ginkgo.By(fmt.Sprintf("Connecting container %s to network %s", node.Name, net.name)) - out, err = runCommand(containerRuntime, "network", "connect", net.name, node.Name) - framework.ExpectNoError(err, "failed to connect container %s to external network %s, out: %s", node.Name, net.name, out) + _, err := providerCtx.AttachNetwork(network, node.Name) + framework.ExpectNoError(err, "failed to connect container %s to external network %s", node.Name, net.name) ginkgo.By(fmt.Sprintf("Setting routes on node %s for network %s (table id %s)", node.Name, net.name, net.routingTable)) - out, err = runCommand(containerRuntime, "exec", node.Name, "ip", "route", "add", sharedIPv4, "via", net.containerIPv4, "table", net.routingTable) - framework.ExpectNoError(err, fmt.Sprintf("failed to add route to %s on node %s table %s, out: %s", net.containerIPv4, node.Name, net.routingTable, out)) - - out, err = runCommand(containerRuntime, "exec", node.Name, "ip", "-6", "route", "add", sharedIPv6, "via", net.containerIPv6, "table", net.routingTable) - framework.ExpectNoError(err, fmt.Sprintf("failed to add route to %s on node %s table %s, out: %s", net.containerIPv6, node.Name, net.routingTable, out)) - - v4, v6 := getContainerAddressesForNetwork(node.Name, net.name) - net.nodesV4IPs[node.Name] = v4 - net.nodesV6IPs[node.Name] = v6 - } - } - - }) - - ginkgo.AfterEach(func() { - for _, net := range []*netSettings{net1, net2} { - deleteClusterExternalContainer(net.containerName) - for _, node := range nodes { - out, err := runCommand(containerRuntime, "network", "disconnect", net.name, node.Name) - framework.ExpectNoError(err, "failed to disconnect container %s from external network %s, out: %s", node.Name, net.name, out) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "route", "add", sharedIPv4, "via", extContainerSecondaryNet.GetIPv4(), "table", net.routingTable}) + framework.ExpectNoError(err, fmt.Sprintf("failed to add route to %s on node %s table %s", extContainerSecondaryNet.GetIPv4(), node.Name, net.routingTable)) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "-6", "route", "add", sharedIPv6, "via", extContainerSecondaryNet.GetIPv6(), "table", net.routingTable}) + framework.ExpectNoError(err, fmt.Sprintf("failed to add route to %s on node %s table %s", extContainerSecondaryNet.GetIPv6(), node.Name, net.routingTable)) + providerCtx.AddCleanUpFn(func() error { + out, err := infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "route", "flush", "table", net.routingTable}) + if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { + return fmt.Errorf("unable to flush table %s on node %s: out: %s, err: %v", net.routingTable, node.Name, out, err) + } + out, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "-6", "route", "flush", "table", net.routingTable}) + if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { + return fmt.Errorf("unable to flush table %s on node %s: out: %s err: %v", net.routingTable, node.Name, out, err) + } + return nil + }) + netNetworkInf, err := infraprovider.Get().GetK8NodeNetworkInterface(node.Name, network) + framework.ExpectNoError(err, "failed to get network interface info for network (%s) on node %s", network, node.Name) + net.nodesV4IPs[node.Name] = netNetworkInf.IPv4 + net.nodesV6IPs[node.Name] = netNetworkInf.IPv6 } - // Remove network after removing the external container and disconnecting the nodes so nothing is attached to it on deletion. - out, err := runCommand(containerRuntime, "network", "rm", net.name) - framework.ExpectNoError(err, "failed to remove external network %s, out: %s", net.name, out) - - flushCustomRoutingTablesOnNodes(nodes, net.routingTable) } }) @@ -1263,7 +1331,9 @@ spec: // We set a route here on the external container to the LB's ingress IP via the first node so it could reach the Service. // In a real scenario an external client might have BGP routes to this IP (via a set of nodes), but setting the first node only // here is enough for the tests (this is different than the SNAT case, where we must set the route via the Service's host). - setSVCRouteOnContainer(net.containerName, svcIP, net.nodesV4IPs[nodes[0].Name], net.nodesV6IPs[nodes[0].Name]) + setSVCRouteOnExternalContainer(infraapi.ExternalContainer{Name: net.containerName}, + svcIP, net.nodesV4IPs[nodes[0].Name], net.nodesV6IPs[nodes[0].Name]) + //TODO: figure out if this will persist on target container net.serviceIP = svcIP } @@ -1276,6 +1346,9 @@ spec: expected = net.nodesV6IPs[nodes[i].Name] dst = sharedIPv6 } + cleanUp, err := forwardIPWithIPTables(dst) + ginkgo.DeferCleanup(cleanUp) + framework.ExpectNoError(err, "must add rules to always forward IP") gomega.Eventually(func() error { return curlAgnHostClientIPFromPod(f.Namespace.Name, pod, expected, dst, podHTTPPort) @@ -1289,7 +1362,11 @@ spec: return curlAgnHostHostnameFromPod(f.Namespace.Name, pod, net.containerName, dst, podHTTPPort) }, 1*time.Second, 200*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "reached an external container with the wrong hostname") } - reachAllServiceBackendsFromExternalContainer(net.containerName, net.serviceIP, podHTTPPort, net.createdPods) + cleanUp, err := forwardIPWithIPTables(net.serviceIP) + ginkgo.DeferCleanup(cleanUp) + framework.ExpectNoError(err, "must add rules to always forward IP") + //FIXME(mk): whole test case is broken for multi platform + reachAllServiceBackendsFromExternalContainer(infraapi.ExternalContainer{Name: net.containerName}, net.serviceIP, podHTTPPort, net.createdPods) } ginkgo.By("Deleting the EgressServices the backend pods should not be able to reach the client (no routes to the shared IPs)") @@ -1303,7 +1380,7 @@ spec: gomega.Consistently(func() error { for _, pod := range append(net1.createdPods, net2.createdPods...) { - err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, "", dst, podHTTPPort) + err := curlAgnHostClientIPFromPod(f.Namespace.Name, pod, "", dst, externalContainer.GetPortStr()) if err != nil && (strings.Contains(err.Error(), fmt.Sprintf("exit code 28")) || // github runners don't have any routes for IPv6, so we get CURLE_COULDNT_CONNECT (protocol == v1.IPv6Protocol && strings.Contains(err.Error(), fmt.Sprintf("exit code 7")))) { @@ -1315,8 +1392,8 @@ spec: return nil }, 2*time.Second, 400*time.Millisecond).ShouldNot(gomega.HaveOccurred(), "managed to reach external container despite having no routes") - reachAllServiceBackendsFromExternalContainer(net1.containerName, net1.serviceIP, podHTTPPort, net1.createdPods) - reachAllServiceBackendsFromExternalContainer(net2.containerName, net2.serviceIP, podHTTPPort, net2.createdPods) + reachAllServiceBackendsFromExternalContainer(infraapi.ExternalContainer{Name: net1.containerName}, net1.serviceIP, podHTTPPort, net1.createdPods) + reachAllServiceBackendsFromExternalContainer(infraapi.ExternalContainer{Name: net2.containerName}, net2.serviceIP, podHTTPPort, net2.createdPods) }, ginkgo.Entry("ipv4 pods", v1.IPv4Protocol), ginkgo.Entry("ipv6 pods", v1.IPv6Protocol), @@ -1325,7 +1402,10 @@ spec: }) // Creates a LoadBalancer service with the given IP and verifies it was set correctly. -func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name string, protocol v1.IPFamily, selector map[string]string, port int32, tweak ...func(svc *v1.Service)) *v1.Service { +func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name string, protocol v1.IPFamily, selector map[string]string, + port string, tweak ...func(svc *v1.Service)) *v1.Service { + portInt, err := strconv.Atoi(port) + framework.ExpectNoError(err, "port must be an integer", port) svc := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, @@ -1336,7 +1416,7 @@ func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name strin Ports: []v1.ServicePort{ { Protocol: v1.ProtocolTCP, - Port: port, + Port: int32(portInt), }, }, Type: v1.ServiceTypeLoadBalancer, @@ -1348,7 +1428,7 @@ func createLBServiceWithIngressIP(cs kubernetes.Interface, namespace, name strin f(svc) } - svc, err := cs.CoreV1().Services(namespace).Create(context.TODO(), svc, metav1.CreateOptions{}) + svc, err = cs.CoreV1().Services(namespace).Create(context.TODO(), svc, metav1.CreateOptions{}) framework.ExpectNoError(err, "failed to create loadbalancer service") gomega.Eventually(func() error { @@ -1437,22 +1517,39 @@ func getEgressSVCHost(cs kubernetes.Interface, svcNamespace, svcName string) (*v // Sets the route to the service via the egress host on the container. // In a real cluster an external client gets a route for the LoadBalancer service -// from the LoadBalancer provider. -func setSVCRouteOnContainer(container, svcIP, v4Via, v6Via string) { +// from the LoadBalancer infra. +func setSVCRouteOnExternalContainer(container infraapi.ExternalContainer, svcIP, v4Via, v6Via string) { if utilnet.IsIPv4String(svcIP) { - out, err := runCommand(containerRuntime, "exec", container, "ip", "route", "replace", svcIP, "via", v4Via) + out, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"ip", "route", "replace", svcIP, "via", v4Via}) framework.ExpectNoError(err, "failed to add the service host route on the external container %s, out: %s", container, out) return } + out, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"ip", "-6", "route", "replace", svcIP, "via", v6Via}) + framework.ExpectNoError(err, "failed to add the service host route on the external container %s, out: %s", container, out) +} - out, err := runCommand(containerRuntime, "exec", container, "ip", "-6", "route", "replace", svcIP, "via", v6Via) +// Sets the route to the service via the egress host on the container. +// In a real cluster an external client gets a route for the LoadBalancer service +// from the LoadBalancer provider. +func setSVCRouteOnContainer(container infraapi.ExternalContainer, svcIP, v4Via, v6Via string) { + var out string + var err error + if utilnet.IsIPv4String(svcIP) { + out, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{ + "ip", "route", "replace", svcIP, "via", v4Via, + }) + } else { + out, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{ + "ip", "-6", "route", "replace", svcIP, "via", v6Via, + }) + } framework.ExpectNoError(err, "failed to add the service host route on the external container %s, out: %s", container, out) } // Sends a request to an agnhost destination's /clientip which returns the source IP of the packet. // Returns an error if the expectedIP is different than the response. -func curlAgnHostClientIPFromPod(namespace, pod, expectedIP, dstIP string, containerPort int) error { - dst := net.JoinHostPort(dstIP, fmt.Sprint(containerPort)) +func curlAgnHostClientIPFromPod(namespace, pod, expectedIP, dstIP, containerPort string) error { + dst := net.JoinHostPort(dstIP, containerPort) curlCmd := fmt.Sprintf("curl -s --retry-connrefused --retry 2 --max-time 0.5 --connect-timeout 0.5 --retry-delay 1 http://%s/clientip", dst) out, err := e2epodoutput.RunHostCmd(namespace, pod, curlCmd) if err != nil { @@ -1468,10 +1565,10 @@ func curlAgnHostClientIPFromPod(namespace, pod, expectedIP, dstIP string, contai return nil } -func curlServiceAgnHostHostnameFromExternalContainer(container, svcIP string, svcPort int32) (string, error) { - dst := net.JoinHostPort(svcIP, fmt.Sprint(svcPort)) - out, err := runCommand(containerRuntime, "exec", container, "curl", "-s", "--retry-connrefused", "--retry", "2", "--max-time", "0.5", - "--connect-timeout", "0.5", "--retry-delay", "1", fmt.Sprintf("http://%s/hostname", dst)) +func curlServiceAgnHostHostnameFromExternalContainer(container infraapi.ExternalContainer, svcIP, svcPort string) (string, error) { + dst := net.JoinHostPort(svcIP, svcPort) + out, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"curl", "-s", "--retry-connrefused", "--retry", "2", "--max-time", "0.5", + "--connect-timeout", "0.5", "--retry-delay", "1", fmt.Sprintf("http://%s/hostname", dst)}) if err != nil { return out, err } @@ -1481,8 +1578,8 @@ func curlServiceAgnHostHostnameFromExternalContainer(container, svcIP string, sv // Sends a request to an agnhost destination's /hostname which returns the hostname of the server. // Returns an error if the expectedHostname is different than the response. -func curlAgnHostHostnameFromPod(namespace, pod, expectedHostname, dstIP string, containerPort int) error { - dst := net.JoinHostPort(dstIP, fmt.Sprint(containerPort)) +func curlAgnHostHostnameFromPod(namespace, pod, expectedHostname, dstIP string, containerPort string) error { + dst := net.JoinHostPort(dstIP, containerPort) curlCmd := fmt.Sprintf("curl -s --retry-connrefused --retry 2 --max-time 0.5 --connect-timeout 0.5 --retry-delay 1 http://%s/hostname", dst) out, err := e2epodoutput.RunHostCmd(namespace, pod, curlCmd) if err != nil { @@ -1496,7 +1593,7 @@ func curlAgnHostHostnameFromPod(namespace, pod, expectedHostname, dstIP string, } // Tries to reach all of the backends of the given service from the container. -func reachAllServiceBackendsFromExternalContainer(container, svcIP string, svcPort int32, svcPods []string) { +func reachAllServiceBackendsFromExternalContainer(container infraapi.ExternalContainer, svcIP, svcPort string, svcPods []string) { backends := map[string]bool{} for _, pod := range svcPods { backends[pod] = true @@ -1520,63 +1617,67 @@ func reachAllServiceBackendsFromExternalContainer(container, svcIP string, svcPo // 2) A blackhole with a higher priority // Then in the actual test we first verify that when the pods are using the custom routing table they can't reach the external container, // remove the blackhole route and verify that they can reach it now. This shows that they actually use a different routing table than the main one. -func setBlackholeRoutingTableOnNodes(nodes []v1.Node, routingTable, externalV4, externalV6 string, useV4 bool) { +func setBlackholeRoutingTableOnNodes(providerCtx infraapi.Context, nodes []v1.Node, extContainer infraapi.ExternalContainer, routingTable string, useV4 bool) { for _, node := range nodes { if useV4 { - setBlackholeRoutesOnRoutingTable(node.Name, externalV4, routingTable) + setBlackholeRoutesOnRoutingTable(providerCtx, node.Name, extContainer.GetIPv4(), routingTable) continue } - if externalV6 != "" { - setBlackholeRoutesOnRoutingTable(node.Name, externalV6, routingTable) + if extContainer.IsIPv6() { + setBlackholeRoutesOnRoutingTable(providerCtx, node.Name, extContainer.GetIPv6(), routingTable) } } } // Sets the regular+blackhole routes on the nodes to the external container. -func setBlackholeRoutesOnRoutingTable(container, ip, table string) { +func setBlackholeRoutesOnRoutingTable(providerCtx infraapi.Context, nodeName, ip, table string) { type route struct { Dst string `json:"dst"` Dev string `json:"dev"` } - out, err := runCommand(containerRuntime, "exec", container, "ip", "--json", "route", "get", ip) - framework.ExpectNoError(err, fmt.Sprintf("failed to get default route to %s on node %s, out: %s", ip, container, out)) + out, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "--json", "route", "get", ip}) + framework.ExpectNoError(err, fmt.Sprintf("failed to get default route to %s on node %s, out: %s", ip, nodeName, out)) routes := []route{} err = json.Unmarshal([]byte(out), &routes) - framework.ExpectNoError(err, fmt.Sprintf("failed to parse route to %s on node %s", ip, container)) + framework.ExpectNoError(err, fmt.Sprintf("failed to parse route to %s on node %s", ip, nodeName)) gomega.Expect(routes).ToNot(gomega.HaveLen(0)) routeTo := routes[0] - out, err = runCommand(containerRuntime, "exec", container, "ip", "route", "replace", ip, "dev", routeTo.Dev, "table", table, "prio", "100") - framework.ExpectNoError(err, fmt.Sprintf("failed to set route to %s on node %s table %s, out: %s", ip, container, table, out)) + out, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "route", "replace", ip, "dev", routeTo.Dev, "table", table, "prio", "100"}) + framework.ExpectNoError(err, fmt.Sprintf("failed to set route to %s on node %s table %s, out: %s", ip, nodeName, table, out)) - out, err = runCommand(containerRuntime, "exec", container, "ip", "route", "replace", "blackhole", ip, "table", table, "prio", "50") - framework.ExpectNoError(err, fmt.Sprintf("failed to set blackhole route to %s on node %s table %s, out: %s", ip, container, table, out)) + doesNotExistMsg := "RTNETLINK answers: No such process" + isAlreadyDeletedFn := func(s string) bool { return strings.Contains(s, doesNotExistMsg) } + + providerCtx.AddCleanUpFn(func() error { + out, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "route", "del", "blackhole", ip, "table", table}) + if err != nil && !isAlreadyDeletedFn(err.Error()) { + return fmt.Errorf("failed to remove black hole route in table 100: stdout %q, err: %q", out, err) + } + return nil + }) + + out, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "route", "replace", "blackhole", ip, "table", table, "prio", "50"}) + providerCtx.AddCleanUpFn(func() error { + out, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "route", "del", "blackhole", ip, "table", table}) + if err != nil && !isAlreadyDeletedFn(err.Error()) { + return fmt.Errorf("failed to remove black hole route in table 50: stdout %q, err: %q", out, err) + } + return nil + }) + framework.ExpectNoError(err, fmt.Sprintf("failed to set blackhole route to %s on node %s table %s, out: %s", ip, nodeName, table, out)) } // Removes the blackhole route to the external container on the nodes. func delExternalClientBlackholeFromNodes(nodes []v1.Node, routingTable, externalV4, externalV6 string, useV4 bool) { for _, node := range nodes { if useV4 { - out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "del", "blackhole", externalV4, "table", routingTable) + out, err := infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "route", "del", "blackhole", externalV4, "table", routingTable}) framework.ExpectNoError(err, fmt.Sprintf("failed to delete blackhole route to %s on node %s table %s, out: %s", externalV4, node.Name, routingTable, out)) continue } - out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "del", "blackhole", externalV6, "table", routingTable) + out, err := infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"ip", "route", "del", "blackhole", externalV6, "table", routingTable}) framework.ExpectNoError(err, fmt.Sprintf("failed to delete blackhole route to %s on node %s table %s, out: %s", externalV6, node.Name, routingTable, out)) } } - -// Flush the custom routing table from all of the nodes. -func flushCustomRoutingTablesOnNodes(nodes []v1.Node, routingTable string) { - for _, node := range nodes { - out, err := runCommand(containerRuntime, "exec", node.Name, "ip", "route", "flush", "table", routingTable) - if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { - framework.Failf("Unable to flush table %s on node %s: out: %s, err: %v", routingTable, node.Name, out, err) - } - out, err = runCommand(containerRuntime, "exec", node.Name, "ip", "-6", "route", "flush", "table", routingTable) - if err != nil && !strings.Contains(err.Error(), "FIB table does not exist") { - framework.Failf("Unable to flush table %s on node %s: out: %s err: %v", routingTable, node.Name, out, err) - } - } -} diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 7ca6ed7575..162af8fad0 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "math/rand" "net" "os" "reflect" @@ -14,13 +13,19 @@ import ( "strings" "time" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/ginkgo/v2/dsl/table" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" - "github.com/onsi/ginkgo/v2" - "github.com/onsi/ginkgo/v2/dsl/table" - "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -38,11 +43,10 @@ const ( DEFAULT_OVN_EGRESSIP_GRPC_HEALTHCHECK_PORT = "9107" OVN_EGRESSIP_LEGACY_HEALTHCHECK_PORT_ENV = "0" // the env value to enable legacy health check OVN_EGRESSIP_LEGACY_HEALTHCHECK_PORT = "9" // the actual port used by legacy health check - primaryNetworkName = "kind" secondaryIPV4Subnet = "10.10.10.0/24" secondaryIPV6Subnet = "2001:db8:abcd:1234::/64" - secondaryNetworkName = "secondary-network" - httpdContainerImageName = "docker.io/httpd:latest" + secondaryNetworkName = "secondarynetwork" + aghHostNetexecSrcIPPath = "/clientip" ) func labelNodeForEgress(f *framework.Framework, nodeName string) { @@ -100,13 +104,14 @@ func (h *egressNodeAvailabilityHandlerViaHealthCheck) checkMode(restore bool) (s // to restore to. return "", "", false } + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() framework.Logf("Checking the ovnkube-node and ovnkube-master (ovnkube-cluster-manager if interconnect=true) healthcheck ports in use") - portNode := getTemplateContainerEnv(ovnNamespace, "daemonset/ovnkube-node", getNodeContainerName(), OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) + portNode := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) var portMaster string if isInterconnectEnabled() { - portMaster = getTemplateContainerEnv(ovnNamespace, "deployment/ovnkube-control-plane", "ovnkube-cluster-manager", OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) + portMaster = getTemplateContainerEnv(ovnKubeNamespace, "deployment/ovnkube-control-plane", "ovnkube-cluster-manager", OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) } else { - portMaster = getTemplateContainerEnv(ovnNamespace, "deployment/ovnkube-master", "ovnkube-master", OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) + portMaster = getTemplateContainerEnv(ovnKubeNamespace, "deployment/ovnkube-master", "ovnkube-master", OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME) } wantLegacy := (h.Legacy && !restore) || (h.modeWasLegacy && restore) @@ -149,12 +154,13 @@ func (h *egressNodeAvailabilityHandlerViaHealthCheck) setMode(nodeName string, r portEnv, port, changeEnv := h.checkMode(restore) if changeEnv { framework.Logf("Updating ovnkube to use health check on port %s (0 is legacy, non 0 is GRPC)", portEnv) + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() setEnv := map[string]string{OVN_EGRESSIP_HEALTHCHECK_PORT_ENV_NAME: portEnv} - setUnsetTemplateContainerEnv(h.F.ClientSet, ovnNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) + setUnsetTemplateContainerEnv(h.F.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) if isInterconnectEnabled() { - setUnsetTemplateContainerEnv(h.F.ClientSet, ovnNamespace, "deployment/ovnkube-control-plane", "ovnkube-cluster-manager", setEnv) + setUnsetTemplateContainerEnv(h.F.ClientSet, ovnKubeNamespace, "deployment/ovnkube-control-plane", "ovnkube-cluster-manager", setEnv) } else { - setUnsetTemplateContainerEnv(h.F.ClientSet, ovnNamespace, "deployment/ovnkube-master", "ovnkube-master", setEnv) + setUnsetTemplateContainerEnv(h.F.ClientSet, ovnKubeNamespace, "deployment/ovnkube-master", "ovnkube-master", setEnv) } } if port != "" { @@ -183,64 +189,44 @@ func (h *egressNodeAvailabilityHandlerViaHealthCheck) Disable(nodeName string) { h.setMode(nodeName, true, false) } -type egressIPStatus struct { - Node string `json:"node"` - EgressIP string `json:"egressIP"` -} - -type egressIP struct { - Status struct { - Items []egressIPStatus `json:"items"` - } `json:"status"` -} -type egressIPs struct { - Items []egressIP `json:"items"` -} - type node struct { name string nodeIP string + port uint16 } -func configNetworkAndGetTarget(subnet string, nodesToAttachNet []string, v6 bool, targetSecondaryNode node) (string, string) { - // configure and add additional network to worker containers for EIP multi NIC feature - createNetwork(secondaryNetworkName, subnet, v6) - if v6 { - // HACK: ensure bridges don't talk to each other. For IPv6, docker support for isolated networks is experimental. - // Remove when it is no longer experimental. See func description for full details. - if err := isolateIPv6Networks(primaryNetworkName, secondaryNetworkName); err != nil { - framework.Failf("failed to isolate IPv6 networks: %v", err) - } - } - for _, nodeName := range nodesToAttachNet { - attachNetwork(secondaryNetworkName, nodeName) - } - v4Addr, v6Addr := createClusterExternalContainer(targetSecondaryNode.name, httpdContainerImageName, []string{"--network", secondaryNetworkName, "-P"}, []string{}) - if v4Addr == "" && !v6 { - panic("failed to get v4 address") - } - if v6Addr == "" && v6 { - panic("failed to get v6 address") +func getLastLogLine(data string) string { + data = strings.TrimSuffix(data, "\n") + logLines := strings.Split(data, "\n") + if len(logLines) == 0 { + return "" } - return v4Addr, v6Addr + return logLines[len(logLines)-1] } -func tearDownNetworkAndTargetForMultiNIC(nodeToDetachNet []string, targetSecondaryNode node) { - deleteClusterExternalContainer(targetSecondaryNode.name) - for _, nodeName := range nodeToDetachNet { - detachNetwork(secondaryNetworkName, nodeName) +// checks if the given IP is found. If there are multiple lines, only consider the last line. +func containsIPInLastEntry(data, ip string) bool { + if strings.Contains(getLastLogLine(data), ip) { + + return true } - deleteNetwork(secondaryNetworkName) + return false } -func removeSliceElement(s []string, i int) []string { - s[i] = s[len(s)-1] - return s[:len(s)-1] +// support for agnhost image is limited to netexec command +func isSupportedAgnhostForEIP(externalContainer infraapi.ExternalContainer) bool { + if externalContainer.Image != images.AgnHost() { + return false + } + if !util.SliceHasStringItem(externalContainer.Args, "netexec") { + return false + } + return true } -// targetExternalContainerAndTest targets the external test container from +// targetHostNetworkContainerAndTest targets the internal host network test container from // our test pods, collects its logs and verifies that the logs have traces -// of the `verifyIPs` provided. We need to target the external test +// of the `verifyIPs` provided. We need to target the test // container multiple times until we verify that all IPs provided by // `verifyIPs` have been verified. This is done by passing it a slice of // verifyIPs and removing each item when it has been found. This function is @@ -251,9 +237,13 @@ func removeSliceElement(s []string, i int) []string { // remove it from the list of verifyIPs, see that it's length is not 0 and // retry again. We do this until all IPs have been seen. If that never // happens (because of a bug) the test fails. -func targetExternalContainerAndTest(targetNode node, podName, podNamespace string, expectSuccess bool, verifyIPs []string) wait.ConditionFunc { +func targetHostNetworkContainerAndTest(targetNode node, podNamespace, podName string, expectSuccess bool, verifyIPs []string) wait.ConditionFunc { + // we only know how to extract src IP from agnhost host configured with netexec and curling path /clientip to return + // the src IP + return func() (bool, error) { - _, err := e2ekubectl.RunKubectl(podNamespace, "exec", podName, "--", "curl", "--connect-timeout", "2", net.JoinHostPort(targetNode.nodeIP, "80")) + clientStdOut, err := e2ekubectl.RunKubectl(podNamespace, "exec", podName, "--", "curl", "--connect-timeout", "2", + net.JoinHostPort(targetNode.nodeIP, fmt.Sprintf("%d", targetNode.port))+aghHostNetexecSrcIPPath) if err != nil { if !expectSuccess { // curl should timeout with a string containing this error, and this should be the case if we expect a failure @@ -265,47 +255,134 @@ func targetExternalContainerAndTest(targetNode node, podName, podNamespace strin } return false, nil } - var targetNodeLogs string - if strings.Contains(targetNode.name, "-host-net-pod") { - // host-networked-pod - targetNodeLogs, err = e2ekubectl.RunKubectl(podNamespace, "logs", targetNode.name) + // we determine the src IP based on the target image + // agnhost netexec will return the source IP as payload + for _, expectedIP := range verifyIPs { + if containsIPInLastEntry(clientStdOut, expectedIP) { + verifyIPs = util.RemoveItemFromSliceUnstable(verifyIPs, expectedIP) + } + } + + if len(verifyIPs) != 0 && expectSuccess { + framework.Logf("the test external container did not have any trace of the IPs: %v being logged, last logs: %s", verifyIPs, getLastLogLine(clientStdOut)) + return false, nil + } + if len(verifyIPs) != 0 && expectSuccess { + framework.Logf("the test host network container did not have any trace of the IPs: %v being logged, last logs: %s", verifyIPs, getLastLogLine(clientStdOut)) + return false, nil + } + if !expectSuccess && len(verifyIPs) == 0 { + framework.Logf("the test host network did have a trace of the IPs: %v being logged, it should not have, last logs: %s", verifyIPs, getLastLogLine(clientStdOut)) + return false, nil + } + return true, nil + } +} + +// targetExternalContainerAndTest targets the external test container from +// our test pods, collects its logs and verifies that the logs have traces +// of the `verifyIPs` provided. We need to target the external test +// container multiple times until we verify that all IPs provided by +// `verifyIPs` have been verified. This is done by passing it a slice of +// verifyIPs and removing each item when it has been found. This function is +// wrapped in a `wait.PollImmediate` which results in the fact that it only +// passes once verifyIPs is of length 0. targetExternalContainerAndTest +// initiates only a single connection at a time, sequentially, hence: we +// perform one connection attempt, check that the IP seen is expected, +// remove it from the list of verifyIPs, see that it's length is not 0 and +// retry again. We do this until all IPs have been seen. If that never +// happens (because of a bug) the test fails. +func targetExternalContainerAndTest(externalContainer infraapi.ExternalContainer, podNamespace, podName string, expectSuccess bool, verifyIPs []string) wait.ConditionFunc { + // we only know how to extract src IP from agnhost host configured with netexec and curling path /clientip to return + // the src IP + if !isSupportedAgnhostForEIP(externalContainer) { + panic("unsupported image") + } + // first try to select the same IP family as IP(s) we are trying to verify. + // if no verify IPs exist, pick v4 or v6 depending on whats available. + var targetIP string + if len(verifyIPs) > 0 { + ip := verifyIPs[0] + if utilnet.IsIPv4String(ip) { + targetIP = externalContainer.GetIPv4() } else { - // external container - targetNodeLogs, err = runCommand(containerRuntime, "logs", targetNode.name) + targetIP = externalContainer.GetIPv6() } + } else { + // pick the first available IP family + if externalContainer.IsIPv4() { + targetIP = externalContainer.GetIPv4() + } else { + targetIP = externalContainer.GetIPv6() + } + } + if targetIP == "" { + framework.Fail("target container IP is not set") + } + URL := net.JoinHostPort(targetIP, externalContainer.GetPortStr()) + aghHostNetexecSrcIPPath + + return func() (bool, error) { + clientStdOut, err := e2ekubectl.RunKubectl(podNamespace, "exec", podName, "--", "curl", "--connect-timeout", "2", URL) if err != nil { - framework.Logf("failed to inspect logs in test container: %v", err) + if !expectSuccess { + // curl should timeout with a string containing this error, and this should be the case if we expect a failure + if !strings.Contains(strings.ToLower(err.Error()), " timeout ") { + framework.Logf("the test expected netserver container to not be able to connect, but it did with another error, err : %v", err) + return false, nil + } + return true, nil + } return false, nil } - targetNodeLogs = strings.TrimSuffix(targetNodeLogs, "\n") - logLines := strings.Split(targetNodeLogs, "\n") - lastLine := logLines[len(logLines)-1] - for i := 0; i < len(verifyIPs); i++ { - if strings.Contains(lastLine, verifyIPs[i]) { - verifyIPs = removeSliceElement(verifyIPs, i) - break + // we determine the src IP based on the target image + // agnhost netexec will return the source IP as payload + switch externalContainer.Image { + case images.AgnHost(): + for _, expectedIP := range verifyIPs { + if containsIPInLastEntry(clientStdOut, expectedIP) { + verifyIPs = util.RemoveItemFromSliceUnstable(verifyIPs, expectedIP) + } } + default: + panic("unimplemented container image") } if len(verifyIPs) != 0 && expectSuccess { - framework.Logf("the test external container did not have any trace of the IPs: %v being logged, last logs: %s", verifyIPs, logLines[len(logLines)-1]) + framework.Logf("the test external container did not have any trace of the IPs: %v being logged, last logs: %s", verifyIPs, getLastLogLine(clientStdOut)) return false, nil } if !expectSuccess && len(verifyIPs) == 0 { - framework.Logf("the test external container did have a trace of the IPs: %v being logged, it should not have, last logs: %s", verifyIPs, logLines[len(logLines)-1]) + framework.Logf("the test external container did have a trace of the IPs: %v being logged, it should not have, last logs: %s", verifyIPs, getLastLogLine(clientStdOut)) return false, nil } return true, nil } } +func removeSliceElement(s []string, i int) []string { + s[i] = s[len(s)-1] + return s[:len(s)-1] +} + +type egressIPStatus struct { + Node string `json:"node"` + EgressIP string `json:"egressIP"` +} + +type egressIP struct { + Status struct { + Items []egressIPStatus `json:"items"` + } `json:"status"` +} +type egressIPs struct { + Items []egressIP `json:"items"` +} + var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigParams networkAttachmentConfigParams) { //FIXME: tests for CDN are designed for single stack clusters (IPv4 or IPv6) and must choose a single IP family for dual stack clusters. // Remove this restriction and allow the tests to detect if an IP family support is available. const ( - servicePort int32 = 9999 - echoServerPodPortMin = 9900 - echoServerPodPortMax = 9999 - podHTTPPort string = "8080" + clusterIPPort uint16 = 9999 + clusterNetworkHTTPPort uint16 = 8080 egressIPName string = "egressip" egressIPName2 string = "egressip-2" targetNodeName string = "egressTargetNode-allowed" @@ -313,7 +390,6 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa targetSecondaryNodeName string = "egressSecondaryTargetNode-allowed" egressIPYaml string = "egressip.yaml" egressFirewallYaml string = "egressfirewall.yaml" - ciNetworkName = "kind" retryTimeout = 3 * retryTimeout // Boost the retryTimeout for EgressIP tests. ) @@ -322,16 +398,22 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa } var ( - egress1Node, egress2Node, pod1Node, pod2Node, targetNode, deniedTargetNode, targetSecondaryNode node - pod1Name = "e2e-egressip-pod-1" - pod2Name = "e2e-egressip-pod-2" - usedEgressNodeAvailabilityHandler egressNodeAvailabilityHandler - isIPv6TestRun bool + egress1Node, egress2Node, pod1Node, pod2Node node + providerCtx infraapi.Context + primaryTargetExternalContainer infraapi.ExternalContainer + primaryDeniedExternalContainer infraapi.ExternalContainer + secondaryTargetExternalContainer infraapi.ExternalContainer + pod1Name = "e2e-egressip-pod-1" + pod2Name = "e2e-egressip-pod-2" + usedEgressNodeAvailabilityHandler egressNodeAvailabilityHandler + isIPv6TestRun bool ) - targetPodAndTest := func(namespace, fromName, toName, toIP string) wait.ConditionFunc { + targetPodAndTest := func(namespace, fromName, toName, toIP string, toPort uint16) wait.ConditionFunc { return func() (bool, error) { - stdout, err := e2ekubectl.RunKubectl(namespace, "exec", fromName, "--", "curl", "--connect-timeout", "2", fmt.Sprintf("%s/hostname", net.JoinHostPort(toIP, podHTTPPort))) + stdout, err := e2ekubectl.RunKubectl(namespace, "exec", fromName, "--", + "curl", "--connect-timeout", "2", fmt.Sprintf("%s/hostname", + net.JoinHostPort(toIP, fmt.Sprintf("%d", toPort)))) if err != nil || stdout != toName { framework.Logf("Error: attempted connection to pod %s found err: %v", toName, err) return false, nil @@ -345,7 +427,7 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa for _, podName := range podNames { _, err := e2ekubectl.RunKubectl(namespace, "exec", podName, "--", "curl", "--connect-timeout", "2", "-k", destination) if err != nil { - framework.Logf("Error: attempted connection to API server found err: %v", err) + framework.Logf("Error: attempted connection to destination %s failed, found err: %v", destination, err) return false, nil } } @@ -353,14 +435,6 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa } } - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} - - dupIP := func(ip net.IP) net.IP { - dup := make(net.IP, len(ip)) - copy(dup, ip) - return dup - } - waitForStatus := func(node string, isReady bool) { err := wait.PollUntilContextTimeout(context.Background(), retryInterval, retryTimeout, true, func(context.Context) (bool, error) { status := getNodeStatus(node) @@ -391,14 +465,21 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa } } - setNodeReady := func(node string, setReady bool) { + setNodeReady := func(providerCtx infraapi.Context, node string, setReady bool) { if !setReady { - _, err := runCommand("docker", "exec", node, "systemctl", "stop", "kubelet.service") + _, err := infraprovider.Get().ExecK8NodeCommand(node, []string{"systemctl", "stop", "kubelet.service"}) if err != nil { framework.Failf("failed to stop kubelet on node: %s, err: %v", node, err) } + providerCtx.AddCleanUpFn(func() error { + _, err := infraprovider.Get().ExecK8NodeCommand(node, []string{"systemctl", "start", "kubelet.service"}) + if err != nil { + return fmt.Errorf("failed to restore kubelet service and ensure it is started on node: %s, err: %v", node, err) + } + return nil + }) } else { - _, err := runCommand("docker", "exec", node, "systemctl", "start", "kubelet.service") + _, err := infraprovider.Get().ExecK8NodeCommand(node, []string{"systemctl", "start", "kubelet.service"}) if err != nil { framework.Failf("failed to start kubelet on node: %s, err: %v", node, err) } @@ -614,6 +695,7 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa // Determine what mode the CI is running in and get relevant endpoint information for the tests ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { @@ -656,35 +738,82 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa name: nodes.Items[1].Name, nodeIP: ips[1], } - targetNode = node{ - name: targetNodeName, - } - deniedTargetNode = node{ - name: deniedTargetNodeName, - } - targetSecondaryNode = node{ - name: targetSecondaryNodeName, - } - isV6 := utilnet.IsIPv6String(egress1Node.nodeIP) - if isV6 { - _, targetNode.nodeIP = createClusterExternalContainer(targetNode.name, httpdContainerImageName, []string{"--network", ciNetworkName, "-P"}, []string{}) - _, deniedTargetNode.nodeIP = createClusterExternalContainer(deniedTargetNode.name, httpdContainerImageName, []string{"--network", ciNetworkName, "-P"}, []string{}) - // configure and add additional network to worker containers for EIP multi NIC feature - _, targetSecondaryNode.nodeIP = configNetworkAndGetTarget(secondaryIPV6Subnet, []string{egress1Node.name, egress2Node.name}, isV6, targetSecondaryNode) - } else { - targetNode.nodeIP, _ = createClusterExternalContainer(targetNode.name, httpdContainerImageName, []string{"--network", ciNetworkName, "-P"}, []string{}) - deniedTargetNode.nodeIP, _ = createClusterExternalContainer(deniedTargetNode.name, httpdContainerImageName, []string{"--network", ciNetworkName, "-P"}, []string{}) - // configure and add additional network to worker containers for EIP multi NIC feature - targetSecondaryNode.nodeIP, _ = configNetworkAndGetTarget(secondaryIPV4Subnet, []string{egress1Node.name, egress2Node.name}, isV6, targetSecondaryNode) - } - // ensure all nodes are ready and reachable for _, node := range nodes.Items { - setNodeReady(node.Name, true) + setNodeReady(providerCtx, node.Name, true) setNodeReachable(node.Name, true) waitForNoTaint(node.Name, "node.kubernetes.io/unreachable") waitForNoTaint(node.Name, "node.kubernetes.io/not-ready") } + // Primary provider network + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary provider network") + + // attach containers to the primary network + primaryTargetExternalContainerPort := infraprovider.Get().GetExternalContainerPort() + primaryTargetExternalContainerSpec := infraapi.ExternalContainer{Name: targetNodeName, Image: images.AgnHost(), + Network: primaryProviderNetwork, Args: getAgnHostHTTPPortBindCMDArgs(primaryTargetExternalContainerPort), ExtPort: primaryTargetExternalContainerPort} + primaryTargetExternalContainer, err = providerCtx.CreateExternalContainer(primaryTargetExternalContainerSpec) + framework.ExpectNoError(err, "failed to create external target container on primary network", primaryTargetExternalContainerSpec.String()) + + primaryDeniedExternalContainerPort := infraprovider.Get().GetExternalContainerPort() + primaryDeniedExternalContainerSpec := infraapi.ExternalContainer{Name: deniedTargetNodeName, Image: images.AgnHost(), + Network: primaryProviderNetwork, Args: getAgnHostHTTPPortBindCMDArgs(primaryDeniedExternalContainerPort), ExtPort: primaryDeniedExternalContainerPort} + primaryDeniedExternalContainer, err = providerCtx.CreateExternalContainer(primaryDeniedExternalContainerSpec) + framework.ExpectNoError(err, "failed to create external denied container on primary network", primaryDeniedExternalContainer.String()) + + // Setup secondary provider network + secondarySubnet := secondaryIPV4Subnet + if isIPv6TestRun { + secondarySubnet = secondaryIPV6Subnet + } + // configure and add additional network to worker containers for EIP multi NIC feature + secondaryProviderNetwork, err := providerCtx.CreateNetwork(secondaryNetworkName, secondarySubnet) + framework.ExpectNoError(err, "creation of network %q with subnet %s must succeed", secondaryNetworkName, secondarySubnet) + // this is only required for KinD infra provider + if isIPv6TestRun && infraprovider.Get().Name() == "kind" { + // HACK: ensure bridges don't talk to each other. For IPv6, docker support for isolated networks is experimental. + // Remove when it is no longer experimental. See func description for full details. + if err := isolateKinDIPv6Networks(primaryProviderNetwork.Name(), secondaryProviderNetwork.Name()); err != nil { + framework.Failf("failed to isolate IPv6 networks: %v", err) + } + } + nodes, err = f.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + framework.ExpectNoError(err, "must list all Nodes") + for _, node := range nodes.Items { + _, err = providerCtx.AttachNetwork(secondaryProviderNetwork, node.Name) + framework.ExpectNoError(err, "network %s must attach to node %s", secondaryProviderNetwork.Name, node.Name) + } + secondaryTargetExternalContainerPort := infraprovider.Get().GetExternalContainerPort() + secondaryTargetExternalContainerSpec := infraapi.ExternalContainer{ + Name: targetSecondaryNodeName, + Image: images.AgnHost(), + Network: secondaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(secondaryTargetExternalContainerPort), + ExtPort: secondaryTargetExternalContainerPort, + } + secondaryTargetExternalContainer, err = providerCtx.CreateExternalContainer(secondaryTargetExternalContainerSpec) + framework.ExpectNoError(err, "unable to create external container %s", secondaryTargetExternalContainerSpec.Name) + if secondaryTargetExternalContainer.GetIPv4() == "" && !isIPv6TestRun { + panic("failed to get v4 address") + } + if secondaryTargetExternalContainer.GetIPv6() == "" && isIPv6TestRun { + panic("failed to get v6 address") + } + + if isIPv6TestRun { + if !primaryTargetExternalContainer.IsIPv6() || !primaryDeniedExternalContainer.IsIPv6() || !secondaryTargetExternalContainer.IsIPv6() { + framework.Failf("one or more external containers do not have an IPv6 address,"+ + " target primary network %q, denied primary network %q, target secondary network %q", + primaryTargetExternalContainer.GetIPv6(), primaryDeniedExternalContainer.GetIPv6(), secondaryTargetExternalContainer.GetIPv6()) + } + } else { + if !primaryTargetExternalContainer.IsIPv4() || !primaryDeniedExternalContainer.IsIPv4() || !secondaryTargetExternalContainer.IsIPv4() { + framework.Failf("one or more external containers do not have an IPv4 address,"+ + " target primary network %q, denied primary network %q, target secondary network %q", + primaryTargetExternalContainer.GetIPv4(), primaryDeniedExternalContainer.GetIPv4(), secondaryTargetExternalContainer.GetIPv4()) + } + } // no further network creation is required if CDN if isClusterDefaultNetwork(netConfigParams) { return @@ -715,12 +844,10 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa e2ekubectl.RunKubectlOrDie("default", "delete", "eip", egressIPName2, "--ignore-not-found=true") e2ekubectl.RunKubectlOrDie("default", "label", "node", egress1Node.name, "k8s.ovn.org/egress-assignable-") e2ekubectl.RunKubectlOrDie("default", "label", "node", egress2Node.name, "k8s.ovn.org/egress-assignable-") - deleteClusterExternalContainer(targetNode.name) - deleteClusterExternalContainer(deniedTargetNode.name) - tearDownNetworkAndTargetForMultiNIC([]string{egress1Node.name, egress2Node.name}, targetSecondaryNode) + // ensure all nodes are ready and reachable for _, node := range []string{egress1Node.name, egress2Node.name} { - setNodeReady(node, true) + setNodeReady(providerCtx, node, true) setNodeReachable(node, true) waitForNoTaint(node, "node.kubernetes.io/unreachable") waitForNoTaint(node, "node.kubernetes.io/not-ready") @@ -777,14 +904,16 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa updateNamespaceLabels(f, f.Namespace, labels) ginkgo.By("1. Create an EgressIP object with two egress IPs defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ - egressIP2 := dupIP(egressNodeIP) - egressIP2[len(egressIP2)-2]++ - egressIP2[len(egressIP2)-1]++ + var egressIP1, egressIP2 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + egressIP2, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + egressIP2, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -821,11 +950,12 @@ spec: } ginkgo.By("3. Create two pods matching the EgressIP: one running on each of the egress nodes") - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) + _, err = createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod2Name) - err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { for _, podName := range []string{pod1Name, pod2Name} { kubectlOut := getPodAddress(podName, f.Namespace.Name) srcIP := net.ParseIP(kubectlOut) @@ -851,13 +981,15 @@ spec: } ginkgo.By("4. Check connectivity from both to an external \"node\" and verify that the IPs are both of the above") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIP1.String(), egressIP2.String()})) framework.ExpectNoError(err, "Step 4. Check connectivity from first to an external \"node\" and verify that the IPs are both of the above, failed: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIP1.String(), egressIP2.String()})) framework.ExpectNoError(err, "Step 4. Check connectivity from second to an external \"node\" and verify that the IPs are both of the above, failed: %v", err) ginkgo.By("5. Check connectivity from one pod to the other and verify that the connection is achieved") - err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP)) + err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP, clusterNetworkHTTPPort)) framework.ExpectNoError(err, "Step 5. Check connectivity from one pod to the other and verify that the connection is achieved, failed, err: %v", err) ginkgo.By("6. Check connectivity from both pods to the api-server (running hostNetwork:true) and verifying that the connection is achieved") @@ -877,11 +1009,13 @@ spec: updatePod(f, pod2) ginkgo.By("8. Check connectivity from that one to an external \"node\" and verify that the IP is the node IP.") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "Step 8. Check connectivity from that one to an external \"node\" and verify that the IP is the node IP, failed, err: %v", err) ginkgo.By("9. Check connectivity from the other one to an external \"node\" and verify that the IPs are both of the above") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIP1.String(), egressIP2.String()})) framework.ExpectNoError(err, "Step 9. Check connectivity from the other one to an external \"node\" and verify that the IP is one of the egress IPs, failed, err: %v", err) ginkgo.By("10. Setting one node as unavailable for egress") @@ -891,7 +1025,8 @@ spec: statuses = verifyEgressIPStatusLengthEquals(1, nil) ginkgo.By("12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "Step 12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP, failed, err: %v", err) ginkgo.By("13. Setting the other node as unavailable for egress") @@ -901,7 +1036,8 @@ spec: statuses = verifyEgressIPStatusLengthEquals(0, nil) ginkgo.By("15. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the node IP.") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{pod1Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{pod1Node.nodeIP})) framework.ExpectNoError(err, "Step 15. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the node IP, failed, err: %v", err) ginkgo.By("16. Setting one node as available for egress") @@ -911,7 +1047,8 @@ spec: statuses = verifyEgressIPStatusLengthEquals(1, nil) ginkgo.By("18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "Step 18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP, failed, err: %v", err) }, ginkgo.Entry("disabling egress nodes with egress-assignable label", &egressNodeAvailabilityHandlerViaLabel{f}), @@ -950,41 +1087,43 @@ spec: 20. Check connectivity from second pod to another node (egress2Node) secondary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted for pods unless pod is on its own egressNode) */ ginkgo.It("[OVN network] Should validate the egress IP SNAT functionality against host-networked pods", func() { - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} - ginkgo.By("0. Add the \"k8s.ovn.org/egress-assignable\" label to egress1Node node") e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") framework.Logf("Added egress-assignable label to node %s", egress1Node.name) e2enode.ExpectNodeHasLabel(context.TODO(), f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") ginkgo.By("1. By setting a secondary IP on non-egress node acting as \"another node\"") - var otherDstIP string + var otherDstIP net.IP + var err error if utilnet.IsIPv6String(egress2Node.nodeIP) { - otherDstIP = "fc00:f853:ccd:e793:ffff::1" + otherDstIP, err = ipalloc.NewPrimaryIPv6() } else { - // TODO(mk): replace with non-repeating IP allocator - otherDstIP = "172.18.1.99" + otherDstIP, err = ipalloc.NewPrimaryIPv4() } - _, err := runCommand(containerRuntime, "exec", egress2Node.name, "ip", "addr", "add", otherDstIP, "dev", "breth0") + otherDst := otherDstIP.String() + framework.Logf("Adding secondary IP %s to external bridge %s on Node %s", otherDst, deploymentconfig.Get().ExternalBridgeName(), egress2Node.name) + _, err = infraprovider.Get().ExecK8NodeCommand(egress2Node.name, []string{"ip", "addr", "add", otherDst, "dev", deploymentconfig.Get().ExternalBridgeName()}) if err != nil { framework.Failf("failed to add address to node %s: %v", egress2Node.name, err) } - defer func() { - _, err = runCommand(containerRuntime, "exec", egress2Node.name, "ip", "addr", "delete", otherDstIP, "dev", "breth0") - if err != nil { - framework.Failf("failed to remove address from node %s: %v", egress2Node.name, err) - } - }() + providerCtx.AddCleanUpFn(func() error { + _, err := infraprovider.Get().ExecK8NodeCommand(egress2Node.name, []string{"ip", "addr", "del", otherDst, "dev", deploymentconfig.Get().ExternalBridgeName()}) + return err + }) + + hostNetPort := infraprovider.Get().GetK8HostPort() otherHostNetPodIP := node{ name: egress2Node.name + "-host-net-pod", - nodeIP: otherDstIP, + nodeIP: otherDst, + port: hostNetPort, } ginkgo.By("2. Creating host-networked pod, on non-egress node acting as \"another node\"") hostNetPodName := egress2Node.name + "-host-net-pod" p, err := createPod(f, hostNetPodName, egress2Node.name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { p.Spec.HostNetwork = true - p.Spec.Containers[0].Image = httpdContainerImageName + p.Spec.Containers[0].Image = images.AgnHost() + p.Spec.Containers[0].Args = getAgnHostHTTPPortBindCMDArgs(hostNetPort) }) framework.ExpectNoError(err) // block until host network pod is fully deleted because subsequent tests that require binding to the same port may fail @@ -999,6 +1138,7 @@ spec: hostNetPod := node{ name: egress2Node.name + "-host-net-pod", nodeIP: egress2Node.nodeIP, + port: hostNetPort, } framework.Logf("Created pod %s on node %s", hostNetPod.name, egress2Node.name) @@ -1009,12 +1149,15 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + if utilnet.IsIPv6String(egress2Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") + framework.Logf("Selected EgressIP %s", egressIP1.String()) var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP metadata: @@ -1048,21 +1191,23 @@ spec: } ginkgo.By("5. Create one pod matching the EgressIP: running on egress1Node") - createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) + _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) framework.ExpectNoError(err, "Step 5. Create one pod matching the EgressIP: running on egress1Node, failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod1Name, pod2Node.name) ginkgo.By("6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP, failed: %v", err) ginkgo.By("7. Check connectivity from pod to another node primary IP and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(hostNetPod, podNamespace.Name, pod1Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 7. Check connectivity from pod to another node primary IP and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("8. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(otherHostNetPodIP, podNamespace.Name, pod1Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 8. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("9. Add the \"k8s.ovn.org/egress-assignable\" label to egress2Node") @@ -1083,25 +1228,26 @@ spec: framework.ExpectNoError(err, "Step 11. Check that the status is of length one and that it is assigned to egress2Node, failed: %v", err) ginkgo.By("12. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 12. Check connectivity from pod to an external \"node\" and verify that the srcIP is the expected egressIP, failed, err: %v", err) ginkgo.By("13. Check connectivity from pod to another node primary IP and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(hostNetPod, podNamespace.Name, pod1Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 13. Check connectivity from pod to another node and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("14. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod1Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(otherHostNetPodIP, podNamespace.Name, pod1Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 14. Check connectivity from pod to another node secondary IP and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("15. Create second pod not matching the EgressIP: running on egress1Node") - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, map[string]string{}) + _, err = createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), map[string]string{}) + framework.ExpectNoError(err, "failed to create pod %s/%s", pod2Name, f.Namespace.Name) _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod2Name) framework.ExpectNoError(err, "Step 15. Create second pod not matching the EgressIP: running on egress1Node, failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod2Name, pod2Node.name) ginkgo.By("16. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod2Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 16. Check connectivity from second pod to external node and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("17. Add pod selector label to make second pod egressIP managed") @@ -1110,15 +1256,15 @@ spec: updatePod(f, pod2) ginkgo.By("18. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod2Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 18. Check connectivity from second pod to external node and verify that the srcIP is the expected egressIP, failed: %v", err) ginkgo.By("19. Check connectivity from second pod to another node primary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode)") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(hostNetPod, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(hostNetPod, podNamespace.Name, pod2Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 19. Check connectivity from second pod to another node and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode), failed: %v", err) ginkgo.By("20. Check connectivity from second pod to another node secondary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode)") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(otherHostNetPodIP, pod2Name, podNamespace.Name, true, []string{egressNodeIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetHostNetworkContainerAndTest(otherHostNetPodIP, podNamespace.Name, pod2Name, true, []string{egress1Node.nodeIP})) framework.ExpectNoError(err, "Step 20. Check connectivity from second pod to another node secondary IP and verify that the srcIP is the expected nodeIP (this verifies SNAT's towards nodeIP are not deleted unless node is egressNode), failed: %v", err) }) @@ -1134,9 +1280,6 @@ spec: 7. Repeat steps 5&6 four times and swap the pod creation between node1 (nonEgressNode) and node2 (egressNode) */ ginkgo.It("Should validate the egress IP SNAT functionality for stateful-sets", func() { - - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} - ginkgo.By("0. Add the \"k8s.ovn.org/egress-assignable\" label to egress1Node node") e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") framework.Logf("Added egress-assignable label to node %s", egress1Node.name) @@ -1149,11 +1292,14 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1188,13 +1334,15 @@ spec: } ginkgo.By("3. Create one pod matching the EgressIP: running on egress1Node") - createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) - _, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) + _, err = createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) + + _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) framework.ExpectNoError(err, "Step 3. Create one pod matching the EgressIP: running on egress1Node, failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod1Name, pod2Node.name) ginkgo.By("4. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 4. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP, failed: %v", err) for i := 0; i < 4; i++ { @@ -1205,12 +1353,14 @@ spec: ginkgo.By("5. Delete the egressPod and recreate it immediately with the same name") _, err = e2ekubectl.RunKubectl(f.Namespace.Name, "delete", "pod", pod1Name, "--grace-period=0", "--force") framework.ExpectNoError(err, "5. Run %d: Delete the egressPod and recreate it immediately with the same name, failed: %v", i, err) - createGenericPodWithLabel(f, pod1Name, nodeSwapName, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, nodeSwapName, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) + _, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) framework.ExpectNoError(err, "5. Run %d: Delete the egressPod and recreate it immediately with the same name, failed, err: %v", i, err) framework.Logf("Created pod %s on node %s", pod1Name, nodeSwapName) ginkgo.By("6. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 6. Run %d: Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP, failed: %v", i, err) } }) @@ -1247,9 +1397,6 @@ spec: if isUserDefinedNetwork(netConfigParams) { ginkgo.Skip("Unsupported for UDNs") } - - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} - ginkgo.By("0. Add the \"k8s.ovn.org/egress-assignable\" label to egress1Node node") e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") framework.Logf("Added egress-assignable label to node %s", egress1Node.name) @@ -1262,21 +1409,24 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node)") - _, err := createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + _, err := createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) framework.ExpectNoError(err, "Step 1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node), failed, err: %v", err) srcPodIP, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, podNamespace.Name, pod1Name) framework.ExpectNoError(err, "Step 1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node), failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod1Name, pod2Node.name) ginkgo.By("2. Create an EgressIP object1 with two egress IP's - egressIP1 and egressIP2 defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ - egressIP2 := dupIP(egressNodeIP) - egressIP2[len(egressIP2)-2]++ - egressIP2[len(egressIP2)-1]++ + var egressIP1, egressIP2 net.IP + var err2 error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + egressIP2, err2 = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + egressIP2, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new IPv4 Node IP") + gomega.Expect(err2).ShouldNot(gomega.HaveOccurred(), "must allocate new IPv6 Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1320,16 +1470,17 @@ spec: } ginkgo.By("4. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP from object1") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{assignedEIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{assignedEIP})) framework.ExpectNoError(err, "Step 4. Check connectivity from pod to an zexternal container and verify that the srcIP is the expected egressIP from object1, failed: %v", err) ginkgo.By("5. Create an EgressIP object2 with one egress IP3 defined (standby egressIP)") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressIP3 := dupIP(egressNodeIP) - egressIP3[len(egressIP3)-2]++ - egressIP3[len(egressIP3)-1]++ - egressIP3[len(egressIP3)-1]++ + var egressIP3 net.IP + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP3, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP3, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig2 = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1380,10 +1531,11 @@ spec: framework.ExpectNoError(err, "Step 6. Check that the second egressIP object is assigned to node2 (pod2Node/egress1Node), failed: %v", err) ginkgo.By("7. Check the OVN DB to ensure no SNATs are added for the standby egressIP") - dbPods, err := e2ekubectl.RunKubectl(ovnNamespace, "get", "pods", "-l", "name=ovnkube-db", "-o=jsonpath='{.items..metadata.name}'") + ovnKubernetesNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + dbPods, err := e2ekubectl.RunKubectl(ovnKubernetesNamespace, "get", "pods", "-l", "name=ovnkube-db", "-o=jsonpath='{.items..metadata.name}'") dbContainerName := "nb-ovsdb" if isInterconnectEnabled() { - dbPods, err = e2ekubectl.RunKubectl(ovnNamespace, "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress1Node.name), "-o=jsonpath='{.items..metadata.name}'") + dbPods, err = e2ekubectl.RunKubectl(ovnKubernetesNamespace, "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress1Node.name), "-o=jsonpath='{.items..metadata.name}'") } if err != nil || len(dbPods) == 0 { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) @@ -1398,7 +1550,7 @@ spec: if isIPv6TestRun { logicalIP = fmt.Sprintf("logical_ip=\"%s\"", srcPodIP.String()) } - snats, err := e2ekubectl.RunKubectl(ovnNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err := e2ekubectl.RunKubectl(ovnKubernetesNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) } @@ -1407,7 +1559,7 @@ spec: } ginkgo.By("8. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP from object1") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{assignedEIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{assignedEIP})) framework.ExpectNoError(err, "Step 8. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP from object1, failed: %v", err) ginkgo.By("9. Delete assigned egressIP1 from egressIP object1") @@ -1458,11 +1610,11 @@ spec: framework.ExpectNoError(err, "Step 10. Check that the status is of length one and that standby egressIP3 of egressIP object2 is assigned to node2 (pod2Node/egress1Node), failed: %v", err) ginkgo.By("11. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP3 from object2") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP3.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP3.String()})) framework.ExpectNoError(err, "Step 11. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP3 from object2, failed: %v", err) ginkgo.By("12. Check the OVN DB to ensure SNATs are added for only the standby egressIP") - snats, err = e2ekubectl.RunKubectl(ovnNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = e2ekubectl.RunKubectl(ovnKubernetesNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for only the standby egressIP, err: %v", err) } @@ -1497,7 +1649,7 @@ spec: framework.ExpectNoError(err, "Step 14. Ensure egressIP1 from egressIP object1 and egressIP3 from object2 is correctly transferred to egress2Node, failed: %v", err) if isInterconnectEnabled() { - dbPods, err = e2ekubectl.RunKubectl(ovnNamespace, "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress2Node.name), "-o=jsonpath='{.items..metadata.name}'") + dbPods, err = e2ekubectl.RunKubectl(ovnKubernetesNamespace, "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress2Node.name), "-o=jsonpath='{.items..metadata.name}'") } if err != nil || len(dbPods) == 0 { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) @@ -1510,7 +1662,7 @@ spec: } ginkgo.By("15. Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3") - snats, err = e2ekubectl.RunKubectl(ovnNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = e2ekubectl.RunKubectl(ovnKubernetesNamespace, "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3, err: %v", err) } @@ -1531,21 +1683,21 @@ spec: } ginkgo.By("16. Check connectivity from pod to an external container and verify that the srcIP is either egressIP1 or egressIP3") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{assignedEIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{assignedEIP})) framework.ExpectNoError(err, "Step 16. Check connectivity from pod to an external container and verify that the srcIP is either egressIP1 or egressIP3, failed: %v", err) ginkgo.By("17. Delete EgressIP object that was serving the pod before in Step 16") e2ekubectl.RunKubectlOrDie("default", "delete", "eip", toDelete) ginkgo.By("18. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{unassignedEIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{unassignedEIP})) framework.ExpectNoError(err, "Step 18. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP, failed: %v", err) ginkgo.By("19. Delete the remaining egressIP object") e2ekubectl.RunKubectlOrDie("default", "delete", "eip", toKeepEIP) ginkgo.By("20. Check connectivity from pod to an external container and verify that the srcIP is the expected nodeIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "Step 20. Check connectivity from pod to an external container and verify that the srcIP is the expected nodeIP, failed: %v", err) }) @@ -1586,11 +1738,14 @@ spec: e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress2Node.name, "k8s.ovn.org/egress-assignable", "dummy") ginkgo.By("1. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") podNamespace := f.Namespace labels := map[string]string{ @@ -1629,11 +1784,11 @@ spec: node1 := statuses[0].Node ginkgo.By("3. Create one pod matching the EgressIP") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) ginkgo.By(fmt.Sprintf("4. Make egress node: %s unreachable", node1)) setNodeReachable(node1, false) - otherNode := egress1Node.name if node1 == egress1Node.name { otherNode = egress2Node.name @@ -1646,7 +1801,7 @@ spec: }) ginkgo.By("6. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP") - err := wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "6. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err) ginkgo.By("7. Check connectivity from pod to the api-server (running hostNetwork:true) and verifying that the connection is achieved") @@ -1665,7 +1820,7 @@ spec: verifyEgressIPStatusLengthEquals(0, nil) ginkgo.By("10. Check connectivity from pod to an external \"node\" and verify that the IP is the node IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{pod1Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{pod1Node.nodeIP})) framework.ExpectNoError(err, "10. Check connectivity from pod to an external \"node\" and verify that the IP is the node IP, failed, err: %v", err) ginkgo.By("11. Make node 1 reachable again") @@ -1678,7 +1833,7 @@ spec: }) ginkgo.By("13. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "13. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err) ginkgo.By("14. Make node 2 reachable again") @@ -1691,7 +1846,7 @@ spec: }) ginkgo.By("17. Make node 1 NotReady") - setNodeReady(node1, false) + setNodeReady(providerCtx, node1, false) ginkgo.By("18. Check that egress IP is assigned to node 2") statuses = verifyEgressIPStatusLengthEquals(1, func(statuses []egressIPStatus) bool { @@ -1700,7 +1855,7 @@ spec: }) ginkgo.By("19. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "19. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err) ginkgo.By("20. Make node 1 not reachable") @@ -1713,7 +1868,7 @@ spec: verifyEgressIPStatusLengthEquals(0, nil) ginkgo.By("23. Make node 1 Ready") - setNodeReady(node1, true) + setNodeReady(providerCtx, node1, true) ginkgo.By("24. Check that egress IP is un-assigned (since node 1 is unreachable)") verifyEgressIPStatusLengthEquals(0, nil) @@ -1728,7 +1883,7 @@ spec: }) ginkgo.By("27. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "27. Check connectivity from pod to an external \"node\" and verify that the IP is the egress IP, failed, err: %v", err) }) @@ -1752,7 +1907,6 @@ spec: if isUserDefinedNetwork(netConfigParams) { ginkgo.Skip("Unsupported for UDNs") } - command := []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", podHTTPPort)} ginkgo.By("0. Add the \"k8s.ovn.org/egress-assignable\" label to one nodes") e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") @@ -1764,11 +1918,14 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP := dupIP(egressNodeIP) - egressIP[len(egressIP)-2]++ + var egressIP net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = `apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1801,11 +1958,12 @@ spec: ginkgo.By("2. Create an EgressFirewall object with one allow rule and one \"block-all\" rule defined") var firewallAllowNode, firewallDenyAll string - if utilnet.IsIPv6String(targetNode.nodeIP) { - firewallAllowNode = targetNode.nodeIP + "/128" + + if isIPv6TestRun { + firewallAllowNode = primaryTargetExternalContainer.GetIPv6() + "/128" firewallDenyAll = "::/0" } else { - firewallAllowNode = targetNode.nodeIP + "/32" + firewallAllowNode = primaryTargetExternalContainer.GetIPv4() + "/32" firewallDenyAll = "0.0.0.0/0" } @@ -1837,9 +1995,11 @@ spec: e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", egressFirewallYaml) ginkgo.By("3. Create two pods, and matching service, matching both egress firewall and egress IP") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) - serviceIP, err := createServiceForPodsWithLabel(f, f.Namespace.Name, servicePort, podHTTPPort, "ClusterIP", podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod1Name) + _, err = createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, pod2Name) + serviceIP, err := createServiceForPodsWithLabel(f, f.Namespace.Name, clusterNetworkHTTPPort, clusterNetworkHTTPPort, "ClusterIP", podEgressLabel) framework.ExpectNoError(err, "Step 3. Create two pods, and matching service, matching both egress firewall and egress IP, failed creating service, err: %v", err) for _, podName := range []string{pod1Name, pod2Name} { _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, podName) @@ -1850,11 +2010,11 @@ spec: verifyEgressIPStatusLengthEquals(1, nil) ginkgo.By("4. Check connectivity to the blocked IP and verify that it fails") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(deniedTargetNode, pod1Name, podNamespace.Name, false, []string{egressIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryDeniedExternalContainer, podNamespace.Name, pod1Name, false, []string{egressIP.String()})) framework.ExpectNoError(err, "Step: 4. Check connectivity to the blocked IP and verify that it fails, failed, err: %v", err) ginkgo.By("5. Check connectivity to the allowed IP and verify it has the egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIP.String()})) framework.ExpectNoError(err, "Step: 5. Check connectivity to the allowed IP and verify it has the egress IP, failed, err: %v", err) // TODO: in the future once we only have shared gateway mode: implement egress firewall so that @@ -1868,11 +2028,11 @@ spec: ginkgo.By("7. Check connectivity to the other pod IP and verify that it works") pod2IP, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod2Name) framework.ExpectNoError(err, "Step 7. Check connectivity to the other pod IP and verify that it works, err retrieving pod %s IP: %v", err, pod2Name) - err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String())) + err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String(), clusterNetworkHTTPPort)) framework.ExpectNoError(err, "Step 7. Check connectivity to the other pod IP and verify that it works, err: %v", err) ginkgo.By("8. Check connectivity to the service IP and verify that it works") - servicePortAsString := strconv.Itoa(int(servicePort)) + servicePortAsString := strconv.Itoa(int(clusterNetworkHTTPPort)) err = wait.PollImmediate(retryInterval, retryTimeout, targetDestinationAndTest(podNamespace.Name, fmt.Sprintf("http://%s/hostname", net.JoinHostPort(serviceIP, servicePortAsString)), []string{pod1Name, pod2Name})) framework.ExpectNoError(err, "8. Check connectivity to the service IP and verify that it works, failed, err %v", err) }) @@ -1908,11 +2068,14 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("Creating an EgressIP object with one egress IPs defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1949,31 +2112,32 @@ spec: ginkgo.By("Creating a client pod labeled to use the EgressIP running on a non egress node") command := []string{"/agnhost", "pause"} - _, err := createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) framework.ExpectNoError(err, "can't create a client pod: %v", err) - ginkgo.By("Creating an external kind container as server to send the traffic to/from") - externalKindContainerName := "kind-external-container-for-egressip-mtu-test" - serverPodPort := rand.Intn(echoServerPodPortMax-echoServerPodPortMin) + echoServerPodPortMin - - deleteClusterExternalContainer(targetNode.name) - targetNode.name = externalKindContainerName - externalKindIPv4, _ := createClusterExternalContainer( - externalKindContainerName, - agnhostImage, - []string{"--privileged", "--network", "kind"}, - []string{"pause"}, - ) + ginkgo.By("Creating an external container (outside k8 cluster) as server to send the traffic to/from") + externalContainerPrimaryPort := infraprovider.Get().GetExternalContainerPort() + // Then create and run the server + httpPort := fmt.Sprintf("--http-port=%d", externalContainerPrimaryPort) + udpPort := fmt.Sprintf("--udp-port=%d", externalContainerPrimaryPort) + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get providers primary network") + externalContainerPrimary := infraapi.ExternalContainer{Name: "external-container-for-egressip-mtu-test", Image: images.AgnHost(), + Network: providerPrimaryNetwork, Args: []string{"pause"}, ExtPort: externalContainerPrimaryPort} + externalContainerPrimary, err = providerCtx.CreateExternalContainer(externalContainerPrimary) + framework.ExpectNoError(err, "failed to create external container: %s", externalContainerPrimary.String()) // First disable PMTUD - _, err = runCommand(containerRuntime, "exec", externalKindContainerName, "sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=2") + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=2"}) framework.ExpectNoError(err, "disabling PMTUD in the external kind container failed: %v", err) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=0"}) + return err + }) - // Then run the server - httpPort := fmt.Sprintf("--http-port=%d", serverPodPort) - udpPort := fmt.Sprintf("--udp-port=%d", serverPodPort) - _, err = runCommand(containerRuntime, "exec", "-d", externalKindContainerName, "/agnhost", "netexec", httpPort, udpPort) - framework.ExpectNoError(err, "running netexec server in the external kind container failed: %v", err) + go func() { + _, _ = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"/agnhost", "netexec", httpPort, udpPort}) + }() ginkgo.By("Checking connectivity to the external kind container and verify that the source IP is the egress IP") var curlErr error @@ -1983,7 +2147,7 @@ spec: retryTimeout, true, func(ctx context.Context) (bool, error) { - curlErr := curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalKindIPv4, serverPodPort) + curlErr := curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) return curlErr == nil, nil }, ) @@ -1998,10 +2162,10 @@ spec: // back to OVN reaching the client pod. ginkgo.By("Making the external kind container reply an oversized UDP packet and checking that it is recieved") payload := fmt.Sprintf("%01420d", 1) - cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %d", + cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %s", payload, - externalKindIPv4, - serverPodPort, + externalContainerPrimary.GetIPv4(), + externalContainerPrimary.GetPortStr(), ) stdout, err := e2epodoutput.RunHostCmd( podNamespace.Name, @@ -2010,11 +2174,12 @@ spec: framework.ExpectNoError(err, "sending echo request to external kind container failed: %v", err) if stdout != payload { - framework.Failf("external kind container did not reply with the requested payload") + framework.Failf("external kind container did not reply with the requested payload.\nstdout: %q\n\npayload: %q\nmust be equal", + stdout, payload) } ginkgo.By("Checking that there is no IP route exception and thus reply was fragmented") - stdout, err = runCommand(containerRuntime, "exec", externalKindContainerName, "ip", "route", "get", egressIP1.String()) + stdout, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"ip", "route", "get", egressIP1.String()}) framework.ExpectNoError(err, "listing the server IP route cache failed: %v", err) if regexp.MustCompile(`cache expires.*mtu.*`).Match([]byte(stdout)) { @@ -2124,8 +2289,8 @@ spec: gomega.Expect(verifyEgressIPStatusContainsIPs(statuses, []string{egressIPIP1, egressIPIP2})).Should(gomega.BeTrue()) ginkgo.By("4. Create two pods matching the EgressIP: one running on each of the egress nodes") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) for _, podName := range []string{pod1Name, pod2Name} { _, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, podName) framework.ExpectNoError(err, "Step 4. Create two pods matching an EgressIP - running pod(s) failed to get "+ @@ -2136,13 +2301,13 @@ spec: ginkgo.By("5. Check connectivity from both pods to an external \"node\" hosted on the secondary host network " + "and verify the expected IPs") - err := wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{egressIPIP1, egressIPIP2})) + err := wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIPIP1, egressIPIP2})) framework.ExpectNoError(err, "Step 5. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is a secondary host network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIPIP1, egressIPIP2})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPIP1, egressIPIP2})) framework.ExpectNoError(err, "Step 5. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is a secondary host network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod2Name, err) @@ -2150,7 +2315,7 @@ spec: pod2IP, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod2Name) framework.ExpectNoError(err, "Step 6. Check connectivity from one pod to the other and verify that the connection "+ "is achieved, failed for pod %s, err: %v", pod2Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String())) + err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String(), clusterNetworkHTTPPort)) framework.ExpectNoError(err, "Step 6. Check connectivity from one pod to the other and verify that the connection "+ "is achieved, failed, err: %v", err) @@ -2173,8 +2338,8 @@ spec: ginkgo.By("9. Check connectivity from pod that isn't selected by EgressIP anymore to an external \"node\" on " + "the OVN network and verify that the IP is the node IP.") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "Step 9. Check connectivity from that one to an external \"node\" on the OVN "+ "network and verify that the IP is the node IP failed: %v", err) @@ -2185,11 +2350,11 @@ spec: ginkgo.By("11. Check connectivity from both pods to an external \"node\" hosted on the secondary host network " + "and verify the expected IPs") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIPIP1, egressIPIP2})) framework.ExpectNoError(err, "Step 11. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is a secondary host network and verify that the src IP is the expected egressIP, failed, err: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, podNamespace.Name, pod2Name, true, []string{egressIPIP1, egressIPIP2})) framework.ExpectNoError(err, "Step 11. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is a secondary host network and verify that the src IP is the expected egressIP, failed, err: %v", podNamespace.Name, pod2Name, err) @@ -2205,8 +2370,8 @@ spec: ginkgo.By("15. Check connectivity from a pod to an external \"node\" on the secondary host network and " + "verify that the IP is the remaining egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "15. Check connectivity from a pod to an external \"node\" on the secondary host network"+ " network and verify that the IP is the remaining egress IP, failed, err: %v", err) @@ -2215,13 +2380,13 @@ spec: ginkgo.By("17. Check connectivity from a pod to an external \"node\" on the OVN network and " + "verify that the IP is the node IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{pod1Node.nodeIP})) framework.ExpectNoError(err, "17. Check connectivity from a pod to an external \"node\" on the OVN network "+ "and verify that the IP is the node IP for pod %s/%s and egress-ing from node %s with node IP %s: %v", podNamespace.Name, pod1Name, pod1Node.name, pod1Node.nodeIP, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "17. Check connectivity from a pod to an external \"node\" on the OVN network "+ "and verify that the IP is the node IP for pod %s/%s and egress-ing from node %s with node IP %s: %v", podNamespace.Name, pod2Name, pod2Node.name, pod2Node.nodeIP, err) @@ -2240,8 +2405,8 @@ spec: ginkgo.By("22. Check connectivity from a pod to an external \"node\" on the secondary host network and verify " + "that the IP is the remaining egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "22. Check connectivity from a pod (%s/%s) to an external \"node\" on the secondary host network and verify "+ "that the IP is the remaining egress IP, failed, err: %v", podNamespace.Name, pod1Name, err) @@ -2255,8 +2420,8 @@ spec: gomega.Expect(verifyEgressIPStatusContainsIPs(statuses, []string{egressIPIP1, egressIPIP2})).Should(gomega.BeTrue()) ginkgo.By("26. Check connectivity from the other pod to an external \"node\" on the secondary host network and verify the expected IPs") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIPIP1, egressIPIP2})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPIP1, egressIPIP2})) framework.ExpectNoError(err, "26. Check connectivity from the other pod (%s/%s) to an external \"node\" on the "+ "secondary host network and verify the expected IPs, failed, err: %v", podNamespace, pod2Name, err) }, table.Entry("IPv4", "10.10.10.100", "10.10.10.200"), @@ -2319,11 +2484,14 @@ spec: updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with two egress IPs - one hosted by an OVN network and one by a secondary host network") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP := dupIP(egressNodeIP) - egressIP[len(egressIP)-2]++ + var egressIP net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") egressIPOVN := egressIP.String() egressIPSecondaryHost := "10.10.10.200" egressIPConfig := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 @@ -2364,8 +2532,8 @@ spec: gomega.Expect(verifyEgressIPStatusContainsIPs(statuses, []string{egressIPOVN, egressIPSecondaryHost})).Should(gomega.BeTrue()) ginkgo.By("4. Create two pods matching the EgressIP: one running on each of the egress nodes") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) + createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) for _, podName := range []string{pod1Name, pod2Name} { _, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, podName) framework.ExpectNoError(err, "Step 4. Create two pods matching an EgressIP - running pod(s) failed to get "+ @@ -2376,25 +2544,25 @@ spec: ginkgo.By("5. Check connectivity a pod to an external \"node\" hosted on the OVN network " + "and verify the expected IP") - err := wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, - podNamespace.Name, true, []string{egressIPOVN})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 5. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{egressIPOVN})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 5. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod2Name, err) ginkgo.By("6. Check connectivity a pod to an external \"node\" hosted on a secondary host network " + "and verify the expected IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{egressIPSecondaryHost})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is secondary host network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIPSecondaryHost})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is secondary host network and verify that the src IP is the expected egressIP, failed: %v", podNamespace.Name, pod2Name, err) @@ -2403,7 +2571,7 @@ spec: pod2IP, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, podNamespace.Name, pod2Name) framework.ExpectNoError(err, "Step 7. Check connectivity from one pod to the other and verify that the connection "+ "is achieved, failed to get Pod %s IP(s), err: %v", pod2Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String())) + err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP.String(), clusterNetworkHTTPPort)) framework.ExpectNoError(err, "Step 7. Check connectivity from one pod to the other and verify that the connection "+ "is achieved, failed, err: %v", err) @@ -2426,8 +2594,8 @@ spec: ginkgo.By("10. Check connectivity from pod that isn't selected by EgressIP anymore to an external \"node\" on " + "the OVN network and verify that the IP is the node IP.") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "Step 10. Check connectivity from that one to an external \"node\" on the OVN "+ "network and verify that the IP is the node IP failed: %v", err) @@ -2438,22 +2606,22 @@ spec: ginkgo.By("12. Check connectivity from both pods to an external \"node\" hosted on the OVN network " + "and verify that the expected IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 12. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, podNamespace.Name, pod2Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 12. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod2Name, err) ginkgo.By("13. Check connectivity from both pods to an external \"node\" hosted on secondary host network " + "and verify that the expected IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, podNamespace.Name, pod1Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 13. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that isn't OVN network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, podNamespace.Name, + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, podNamespace.Name, pod2Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 13. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that isn't OVN network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod2Name, err) @@ -2479,12 +2647,12 @@ spec: ginkgo.By("17. Check connectivity from both pods to an external \"node\" on the secondary host network and " + "verify that the src IP is the expected egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "17. Check connectivity from both pods (%s/%s) to an external \"node\" on the secondary host"+ " network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{statuses[0].EgressIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{statuses[0].EgressIP})) framework.ExpectNoError(err, "17. Check connectivity from both pods (%s/%s) to an external \"node\" on the secondary host network"+ " network and verify that the src IP is the expected egress IP, failed, err: %v", podNamespace.Name, pod2Name, err) @@ -2495,12 +2663,12 @@ spec: statuses = verifyEgressIPStatusLengthEquals(0, nil) ginkgo.By("20. Check connectivity from both pods to an external \"node\" on the OVN network and verify that the src IP is the node IPs") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, - podNamespace.Name, true, []string{pod1Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{pod1Node.nodeIP})) framework.ExpectNoError(err, "20. Check connectivity from both pods (%s/%s) to an external \"node\" on the "+ "OVN network and verify that the src IP is the node IP %s, failed: %v", podNamespace, pod1Name, pod1Node.nodeIP, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{pod2Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "20. Check connectivity from both pods (%s/%s) to an external \"node\" on the "+ "OVN network and verify that the src IP is the node IP %s, failed: %v", podNamespace, pod2Name, pod2Node.nodeIP, err) @@ -2523,22 +2691,22 @@ spec: gomega.Expect(verifyEgressIPStatusContainsIPs(statuses, []string{egressIPOVN, egressIPSecondaryHost})).Should(gomega.BeTrue()) ginkgo.By("27. Check connectivity from both pods to an external \"node\" on the OVN network and verify the src IP is the expected egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, - podNamespace.Name, true, []string{egressIPOVN})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 27. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egress IP, failed: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, - podNamespace.Name, true, []string{egressIPOVN})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPOVN})) framework.ExpectNoError(err, "Step 27. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is OVN network and verify that the src IP is the expected egress IP, failed: %v", podNamespace.Name, pod2Name, err) ginkgo.By("28. Check connectivity both pods to an external \"node\" on the secondary host network and verify the src IP is the expected egress IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{egressIPSecondaryHost})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 28. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is secondary host network and verify that the src IP is the expected egress IP, failed: %v", podNamespace.Name, pod1Name, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIPSecondaryHost})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIPSecondaryHost})) framework.ExpectNoError(err, "Step 28. Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is secondary host network and verify that the src IP is the expected egress IP, failed: %v", podNamespace.Name, pod2Name, err) @@ -2626,11 +2794,11 @@ spec: verifySpecificEgressIPStatusLengthEquals(egressIPName2, 1, nil) ginkgo.By("3. Create two pods - one matching each EgressIP") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + _, err := createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) podEgressLabel2 := map[string]string{ "wants": "egress2", } - createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel2) + createGenericPodWithLabel(f, pod2Name, pod2Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel2) for _, podName := range []string{pod1Name, pod2Name} { _, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, podNamespace.Name, podName) framework.ExpectNoError(err, "Step 3. Create two pods - one matching each EgressIP, failed for pod %s, err: %v", podName, err) @@ -2638,12 +2806,12 @@ spec: ginkgo.By("4. Check connectivity from both pods to an external \"node\" hosted on a secondary host network " + "and verify the expected IPs") - err := wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{egressIP1})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIP1})) framework.ExpectNoError(err, "4. Check connectivity from both pods to an external \"node\" hosted on a secondary host network "+ "and verify the expected IPs, failed for EgressIP %s: %v", egressIPName, err) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIP2})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIP2})) framework.ExpectNoError(err, "4. Check connectivity from both pods to an external \"node\" hosted on a secondary host network "+ "and verify the expected IPs, failed for EgressIP %s: %v", egressIPName2, err) @@ -2651,14 +2819,14 @@ spec: e2ekubectl.RunKubectlOrDie("default", "delete", "eip", egressIPName, "--ignore-not-found=true") ginkgo.By("6. Check connectivity to the host on the secondary host network from the pod selected by the other EgressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod2Name, - podNamespace.Name, true, []string{egressIP2})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod2Name, true, []string{egressIP2})) framework.ExpectNoError(err, "6. Check connectivity to the host on the secondary host network from the pod "+ "selected by the other EgressIP, failed: %v", err) ginkgo.By("7. Check connectivity to the host on the OVN network from the pod not selected by EgressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, - podNamespace.Name, true, []string{pod1Node.nodeIP})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{pod1Node.nodeIP})) framework.ExpectNoError(err, "7. Check connectivity to the host on the OVN network from the pod not selected by EgressIP, failed: %v", err) }) @@ -2687,7 +2855,7 @@ spec: vrfName := "egress-vrf" vrfRoutingTable := "99999" // find the egress interface name - out, err := runCommand(containerRuntime, "exec", egress1Node.name, "ip", "-o", "route", "get", egressIP1) + out, err := infraprovider.Get().ExecK8NodeCommand(egress1Node.name, []string{"ip", "-o", "route", "get", egressIP1}) if err != nil { framework.Failf("failed to add expected EIP assigned interface, err %v, out: %s", err, out) } @@ -2704,42 +2872,37 @@ spec: } // Enslaving a link to a VRF device may cause the removal of the non link local IPv6 address from the interface // Look up the IP address, add it after enslaving the link and perform test. + networks, err := providerCtx.GetAttachedNetworks() + _, exists := networks.Get(secondaryNetworkName) + gomega.Expect(exists).Should(gomega.BeTrue(), "network %s must exist", secondaryNetworkName) + secondaryNetwork, _ := networks.Get(secondaryNetworkName) restoreLinkIPv6AddrFn := func() error { return nil } if isV6Node { ginkgo.By("attempting to find IPv6 global address for secondary network") - address, err := runCommand(containerRuntime, "inspect", "-f", - fmt.Sprintf("'{{ (index .NetworkSettings.Networks \"%s\").GlobalIPv6Address }}'", secondaryNetworkName), egress1Node.name) - if err != nil { - framework.Failf("failed to get node %s IP address for network %s: %v", egress1Node.name, secondaryNetworkName, err) - } - address = strings.TrimSuffix(address, "\n") - address = strings.Trim(address, "'") - ginkgo.By(fmt.Sprintf("found address %q", address)) - gomega.Expect(net.ParseIP(address)).ShouldNot(gomega.BeNil(), "IPv6 address for secondary network must be present") - prefix, err := runCommand(containerRuntime, "inspect", "-f", - fmt.Sprintf("'{{ (index .NetworkSettings.Networks \"%s\").GlobalIPv6PrefixLen }}'", secondaryNetworkName), egress1Node.name) - if err != nil { - framework.Failf("failed to get node %s IP prefix length for network %s: %v", egress1Node.name, secondaryNetworkName, err) - } - prefix = strings.TrimSuffix(prefix, "\n") - prefix = strings.Trim(prefix, "'") - _, err = strconv.Atoi(prefix) + inf, err := infraprovider.Get().GetK8NodeNetworkInterface(egress1Node.name, secondaryNetwork) + framework.ExpectNoError(err, "failed to get network interface for network %s on instance %s", secondaryNetwork.Name(), egress1Node.name) + gomega.Expect(net.ParseIP(inf.IPv6)).ShouldNot(gomega.BeNil(), "IPv6 address for secondary network must be present") + _, err = strconv.Atoi(inf.IPv6Prefix) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "requires valid IPv6 address prefix") restoreLinkIPv6AddrFn = func() error { - _, err := runCommand(containerRuntime, "exec", egress1Node.name, "ip", "-6", "address", "add", - fmt.Sprintf("%s/%s", address, prefix), "dev", egressInterface, "nodad", "scope", "global") + _, err := infraprovider.Get().ExecK8NodeCommand(egress1Node.name, []string{ + "ip", "-6", "address", "add", + fmt.Sprintf("%s/%s", inf.IPv6, inf.IPv6Prefix), "dev", egressInterface, "nodad", "scope", "global", + }) return err } } - _, err = runCommand(containerRuntime, "exec", egress1Node.name, "ip", "link", "add", vrfName, "type", "vrf", "table", vrfRoutingTable) - if err != nil { - framework.Failf("failed to add VRF to node %s: %v", egress1Node.name, err) - } - defer runCommand(containerRuntime, "exec", egress1Node.name, "ip", "link", "del", vrfName) - _, err = runCommand(containerRuntime, "exec", egress1Node.name, "ip", "link", "set", "dev", egressInterface, "master", vrfName) - if err != nil { - framework.Failf("failed to enslave interface %s to VRF %s node %s: %v", egressInterface, vrfName, egress1Node.name, err) - } + _, err = infraprovider.Get().ExecK8NodeCommand(egress1Node.name, []string{"ip", "link", "add", vrfName, "type", "vrf", "table", vrfRoutingTable}) + framework.ExpectNoError(err, "failed to add VRF to node %s: %v", egress1Node.name) + providerCtx.AddCleanUpFn(func() error { + _, err := infraprovider.Get().ExecK8NodeCommand(egress1Node.name, []string{ + "ip", "link", "del", vrfName, + }) + return err + }) + _, err = infraprovider.Get().ExecK8NodeCommand(egress1Node.name, []string{"ip", "link", "set", "dev", egressInterface, "master", vrfName}) + framework.ExpectNoError(err, "failed to enslave interface %s to VRF %s node %s", egressInterface, vrfName, egress1Node.name) + if isV6Node { gomega.Expect(restoreLinkIPv6AddrFn()).Should(gomega.Succeed(), "restoring IPv6 address should succeed") } @@ -2781,13 +2944,13 @@ spec: ginkgo.By("3. Check that status of EgressIP object is of length one") verifySpecificEgressIPStatusLengthEquals(egressIPName, 1, nil) ginkgo.By("4. Create a pod matching the EgressIP") - createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) framework.ExpectNoError(err, "Step 4. Create a pod matching the EgressIP, failed, err: %v", err) ginkgo.By("5. Check connectivity from a pod to an external \"node\" hosted on a secondary host network " + "and verify the expected IP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, pod1Name, - podNamespace.Name, true, []string{egressIP1})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(secondaryTargetExternalContainer, + podNamespace.Name, pod1Name, true, []string{egressIP1})) framework.ExpectNoError(err, "5. Check connectivity a pod to an external \"node\" hosted on a secondary host network "+ "and verify the expected IP, failed for EgressIP %s: %v", egressIPName, err) }) @@ -2831,11 +2994,13 @@ spec: updateNamespaceLabels(f, otherNetworkNamespace, selectedByEIPLabels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -2870,9 +3035,9 @@ spec: } ginkgo.By("5. Create two pods matching the EgressIP with each connected to the same network") - pod1, err := createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + pod1, err := createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) framework.ExpectNoError(err, "5. Create one pod matching the EgressIP: running on egress1Node, failed: %v", err) - pod2, err := createGenericPodWithLabel(f, pod2Name, pod2Node.name, otherNetworkNamespace.Name, command, podEgressLabel) + pod2, err := createGenericPodWithLabel(f, pod2Name, pod2Node.name, otherNetworkNamespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) framework.ExpectNoError(err, "5. Create one pod matching the EgressIP: running on egress2Node, failed: %v", err) gomega.Expect(pod.WaitForPodRunningInNamespace(context.TODO(), f.ClientSet, pod1)).Should(gomega.Succeed()) @@ -2884,25 +3049,25 @@ spec: framework.Logf("Created pod %s on node %s", pod2Name, pod2Node.name) ginkgo.By("6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, pod1Namespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod1Namespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP, failed: %v", err) ginkgo.By("7. Check connectivity from pod connected to the same network and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, pod2OtherNetworkNamespace, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod2OtherNetworkNamespace, pod2Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 7. Check connectivity from pod connected to the same network and verify that the srcIP is the expected nodeIP, failed: %v", err) ginkgo.By("8. Delete pod in one namespace") framework.ExpectNoError(pod.DeletePodWithWait(context.TODO(), f.ClientSet, pod1), "pod %s/%s deletion failed", pod1.Namespace, pod1.Name) ginkgo.By("9. Check connectivity from other pod and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, pod2OtherNetworkNamespace, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod2OtherNetworkNamespace, pod2Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 9. Check connectivity from other pod and verify that the srcIP is the expected egressIP, failed: %v", err) ginkgo.By("10. Delete namespace with zero pods") gomega.Expect(f.ClientSet.CoreV1().Namespaces().Delete(context.TODO(), pod1.Namespace, metav1.DeleteOptions{})).To(gomega.Succeed()) ginkgo.By("11. Check connectivity from other pod and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, pod2OtherNetworkNamespace, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod2OtherNetworkNamespace, pod2Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 11. Check connectivity from other pod and verify that the srcIP is the expected egressIP and verify that the srcIP is the expected nodeIP, failed: %v", err) }) @@ -2964,11 +3129,13 @@ spec: updateNamespaceLabels(f, otherNetworkNamespace, selectedByEIPLabels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") - // Assign the egress IP without conflicting with any node IP, - // the kind subnet is /16 or /64 so the following should be fine. - egressNodeIP := net.ParseIP(egress1Node.nodeIP) - egressIP1 := dupIP(egressNodeIP) - egressIP1[len(egressIP1)-2]++ + var egressIP1 net.IP + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -3003,9 +3170,9 @@ spec: } ginkgo.By("5. Create two pods matching the EgressIP with each connected to a different network") - _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod1Name, pod1Node.name, f.Namespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) framework.ExpectNoError(err, "5. Create one pod matching the EgressIP: running on egress1Node, failed: %v", err) - _, err = createGenericPodWithLabel(f, pod2Name, pod2Node.name, otherNetworkNamespace.Name, command, podEgressLabel) + _, err = createGenericPodWithLabel(f, pod2Name, pod2Node.name, otherNetworkNamespace.Name, getAgnHostHTTPPortBindFullCMD(clusterNetworkHTTPPort), podEgressLabel) framework.ExpectNoError(err, "5. Create one pod matching the EgressIP: running on egress2Node, failed: %v", err) _, err = getPodIPWithRetry(f.ClientSet, isIPv6TestRun, f.Namespace.Name, pod1Name) framework.ExpectNoError(err, "Step 5. Create one pod matching the EgressIP: running on egress1Node, failed, err: %v", err) @@ -3015,11 +3182,11 @@ spec: framework.Logf("Created pod %s on node %s", pod2Name, pod2Node.name) ginkgo.By("6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, pod1Namespace.Name, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod1Namespace.Name, pod1Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 6. Check connectivity from pod to an external node and verify that the srcIP is the expected egressIP, failed: %v", err) ginkgo.By("7. Check connectivity from pod connected to a different network and verify that the srcIP is the expected egressIP") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, pod2OtherNetworkNamespace, true, []string{egressIP1.String()})) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(primaryTargetExternalContainer, pod2OtherNetworkNamespace, pod2Name, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 7. Check connectivity from pod connected to a different network and verify that the srcIP is the expected nodeIP, failed: %v", err) }, ginkgo.Entry("L3 Primary UDN", networkAttachmentConfigParams{ diff --git a/test/e2e/egressqos.go b/test/e2e/egressqos.go index 85143708d9..4f6b282027 100644 --- a/test/e2e/egressqos.go +++ b/test/e2e/egressqos.go @@ -97,6 +97,9 @@ var _ = ginkgo.Describe("e2e EgressQoS validation", func() { _, err := createPod(f, srcPodName, srcNode, f.Namespace.Name, []string{}, map[string]string{"app": "test"}) framework.ExpectNoError(err) } + if dst1IP == nil || *dst1IP == "" { + ginkgo.Skip("destination IP is not available from target. IP family may not be available") + } egressQoSConfig := fmt.Sprintf(` apiVersion: k8s.ovn.org/v1 @@ -174,6 +177,9 @@ spec: ginkgo.DescribeTable("Should validate correct DSCP value on pod labels changes", func(tcpDumpTpl string, dst1IP *string, prefix1 string, dst2IP *string, prefix2 string) { dscpValue := 50 + if *dst1IP == "" || *dst2IP == "" { + ginkgo.Skip("destination IP(s) are not available") + } // create without labels, no packets should be marked pod, err := createPod(f, srcPodName, srcNode, f.Namespace.Name, []string{}, nil) diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 139feed95d..c7bf83d9f9 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -11,57 +11,58 @@ import ( "sync" "time" - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" "github.com/google/go-cmp/cmp" nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" - v1 "k8s.io/api/core/v1" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" - "k8s.io/klog" + "k8s.io/klog/v2" "k8s.io/kubernetes/test/e2e/framework" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" "k8s.io/kubernetes/test/e2e/framework/skipper" - utilnet "k8s.io/utils/net" ) // This is the image used for the containers acting as externalgateways, built // out from the e2e/images/Dockerfile.frr dockerfile const ( externalContainerImage = "quay.io/trozet/ovnkbfdtest:0.3" - srcHTTPPort = 80 - srcUDPPort = 90 externalGatewayPodIPsAnnotation = "k8s.ovn.org/external-gw-pod-ips" defaultPolicyName = "default-route-policy" anyLink = "any" ) -var externalContainerNetwork = "kind" -var externalContainerIPv4 = "" -var externalContainerIPv6 = "" - -func init() { +func getOverrideNetwork() (string, string, string) { // When the env variable is specified, we use a different docker network for // containers acting as external gateways. // In a specific case where the variable is set to `host` we create only one // external container to act as an external gateway, as we can't create 2 // because of overlapping ip/ports (like the bfd port). + var networkName, ipv4, ipv6 string if exNetwork, found := os.LookupEnv("OVN_TEST_EX_GW_NETWORK"); found { - externalContainerNetwork = exNetwork + networkName = exNetwork } - // When OVN_TEST_EX_GW_NETWORK is set to "host" we need to set the container's IP from outside if exHostIPv4, found := os.LookupEnv("OVN_TEST_EX_GW_IPV4"); found { - externalContainerIPv4 = exHostIPv4 + ipv4 = exHostIPv4 } - if exHostIPv6, found := os.LookupEnv("OVN_TEST_EX_GW_IPV6"); found { - externalContainerIPv6 = exHostIPv6 + ipv6 = exHostIPv6 } + return networkName, ipv4, ipv6 +} + +func getContainerName(template string, port uint16) string { + return fmt.Sprintf(template, port) } // gatewayTestIPs collects all the addresses required for an external gateway @@ -75,27 +76,34 @@ type gatewayTestIPs struct { var _ = ginkgo.Describe("External Gateway", func() { - // Validate pods can reach a network running in a container's looback address via + const ( + gwTCPPort = 80 + gwUDPPort = 90 + podTCPPort = 80 + podUDPPort = 90 + ) + + // Validate pods can reach a network running in a container's loopback address via // an external gateway running on eth0 of the container without any tunnel encap. // Next, the test updates the namespace annotation to point to a second container, // emulating the ext gateway. This test requires shared gateway mode in the job infra. var _ = ginkgo.Describe("e2e non-vxlan external gateway and update validation", func() { const ( - svcname string = "multiple-novxlan-externalgw" - ovnWorkerNode string = "ovn-worker" - ovnContainer string = "ovnkube-node" - gwContainerNameAlt1 string = "gw-novxlan-test-container-alt1" - gwContainerNameAlt2 string = "gw-novxlan-test-container-alt2" - ovnControlNode string = "ovn-control-plane" + svcname string = "multiple-novxlan-externalgw" + gwContainerNameTemplate string = "gw-novxlan-test-container-alt1-%d" + gwContainerNameTemplate2 string = "gw-novxlan-test-container-alt2-%d" ) var ( exGWRemoteIpAlt1 string exGWRemoteIpAlt2 string + providerCtx infraapi.Context ) + f := wrappedTestFramework(svcname) // Determine what mode the CI is running in and get relevant endpoint information for the tests ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() exGWRemoteIpAlt1 = "10.249.3.1" exGWRemoteIpAlt2 = "10.249.4.1" if IsIPv6Cluster(f.ClientSet) { @@ -104,20 +112,6 @@ var _ = ginkgo.Describe("External Gateway", func() { } }) - ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - if cid, _ := runCommand(containerRuntime, "ps", "-qaf", fmt.Sprintf("name=%s", gwContainerNameAlt1)); cid != "" { - if _, err := runCommand(containerRuntime, "rm", "-f", gwContainerNameAlt1); err != nil { - framework.Logf("failed to delete the gateway test container %s %v", gwContainerNameAlt1, err) - } - } - if cid, _ := runCommand(containerRuntime, "ps", "-qaf", fmt.Sprintf("name=%s", gwContainerNameAlt2)); cid != "" { - if _, err := runCommand(containerRuntime, "rm", "-f", gwContainerNameAlt2); err != nil { - framework.Logf("failed to delete the gateway test container %s %v", gwContainerNameAlt2, err) - } - } - }) - ginkgo.It("Should validate connectivity without vxlan before and after updating the namespace annotation to a new external gateway", func() { var pingSrc string @@ -128,32 +122,57 @@ var _ = ginkgo.Describe("External Gateway", func() { command := []string{"bash", "-c", "sleep 20000"} testContainer := fmt.Sprintf("%s-container", srcPingPodName) testContainerFlag := fmt.Sprintf("--container=%s", testContainer) - // non-ha ci mode runs a set of kind nodes prefixed with ovn-worker - ciWorkerNodeSrc := ovnWorkerNode - // start the container that will act as an external gateway - _, err := runCommand(containerRuntime, "run", "-itd", "--privileged", "--network", externalContainerNetwork, "--name", gwContainerNameAlt1, agnhostImage) - if err != nil { - framework.Failf("failed to start external gateway test container %s: %v", gwContainerNameAlt1, err) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + overrideNetworkStr, overrideIPv4, overrideIPv6 := getOverrideNetwork() + if overrideNetworkStr != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkStr) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork } - // retrieve the container ip of the external gateway container - alt1IPv4, alt1IPv6 := getContainerAddressesForNetwork(gwContainerNameAlt1, externalContainerNetwork) - if err != nil { - framework.Failf("failed to start external gateway test container: %v", err) + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer := infraapi.ExternalContainer{Name: getContainerName(gwContainerNameTemplate, externalContainerPort), + Image: images.AgnHost(), Network: network, ExtPort: externalContainerPort, Args: []string{"pause"}} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to start external gateway test container") + if network.Name() == "host" { + // manually cleanup because cleanup doesnt cleanup host network + providerCtx.AddCleanUpFn(func() error { + return providerCtx.DeleteExternalContainer(externalContainer) + }) } - nodeIPv4, nodeIPv6 := getContainerAddressesForNetwork(ciWorkerNodeSrc, externalContainerNetwork) - - exGWRemoteCidrAlt1 := fmt.Sprintf("%s/24", exGWRemoteIpAlt1) - exGWRemoteCidrAlt2 := fmt.Sprintf("%s/24", exGWRemoteIpAlt2) - exGWIpAlt1 := alt1IPv4 - nodeIP := nodeIPv4 + // non-ha ci mode runs a set of kind nodes prefixed with ovn-worker + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 1) + framework.ExpectNoError(err, "failed to find 3 ready and schedulable nodes") + if len(nodes.Items) < 1 { + framework.Failf("requires at least 1 Nodes") + } + node := &nodes.Items[0] + ni, err := infraprovider.Get().GetK8NodeNetworkInterface(node.Name, network) + framework.ExpectNoError(err, "must get network interface info") + var nodeAddr string + var exGWIpAlt1, exGWRemoteCidrAlt1, exGWRemoteCidrAlt2 string if isIPv6Cluster { - exGWIpAlt1 = alt1IPv6 + exGWIpAlt1 = externalContainer.GetIPv6() + if overrideIPv6 != "" { + exGWIpAlt1 = overrideIPv6 + } exGWRemoteCidrAlt1 = fmt.Sprintf("%s/64", exGWRemoteIpAlt1) exGWRemoteCidrAlt2 = fmt.Sprintf("%s/64", exGWRemoteIpAlt2) - nodeIP = nodeIPv6 + nodeAddr = ni.IPv6 + } else { + exGWIpAlt1 = externalContainer.GetIPv4() + if overrideIPv4 != "" { + exGWIpAlt1 = overrideIPv4 + } + exGWRemoteCidrAlt1 = fmt.Sprintf("%s/24", exGWRemoteIpAlt1) + exGWRemoteCidrAlt2 = fmt.Sprintf("%s/24", exGWRemoteIpAlt2) + nodeAddr = ni.IPv4 + } + if nodeAddr == "" { + framework.Failf("failed to find node internal IP for node %s", node.Name) } - // annotate the test namespace annotateArgs := []string{ "annotate", @@ -164,18 +183,17 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.Logf("Annotating the external gateway test namespace to a container gw: %s ", exGWIpAlt1) e2ekubectl.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - podCIDR, _, err := getNodePodCIDRs(ciWorkerNodeSrc) + podCIDR, _, err := getNodePodCIDRs(node.Name) if err != nil { - framework.Failf("Error retrieving the pod cidr from %s %v", ciWorkerNodeSrc, err) + framework.Failf("Error retrieving the pod cidr from %s %v", node.Name, err) } - framework.Logf("the pod cidr for node %s is %s", ciWorkerNodeSrc, podCIDR) + framework.Logf("the pod cidr for node %s is %s", node.Name, podCIDR) // add loopback interface used to validate all traffic is getting drained through the gateway - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt1, "ip", "address", "add", exGWRemoteCidrAlt1, "dev", "lo") - if err != nil { - framework.Failf("failed to add the loopback ip to dev lo on the test container: %v", err) - } + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "address", "add", exGWRemoteCidrAlt1, "dev", "lo"}) + framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the test container") + // Create the pod that will be used as the source for the connectivity test - _, err = createGenericPod(f, srcPingPodName, ciWorkerNodeSrc, f.Namespace.Name, command) + _, err = createGenericPod(f, srcPingPodName, node.Name, f.Namespace.Name, command) framework.ExpectNoError(err, "failed to create pod %s/%s", f.Namespace.Name, srcPingPodName) // wait for pod setup to return a valid address err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { @@ -187,36 +205,46 @@ var _ = ginkgo.Describe("External Gateway", func() { return true, nil }) // Fail the test if no address is ever retrieved - if err != nil { - framework.Failf("Error trying to get the pod IP address") - } + framework.ExpectNoError(err, "Error trying to get the pod IP address") // add a host route on the first mock gateway for return traffic to the pod - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt1, "ip", "route", "add", pingSrc, "via", nodeIP) - if err != nil { - framework.Failf("failed to add the pod host route on the test container: %v", err) - } - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt1, "ping", "-c", "5", pingSrc) - framework.ExpectNoError(err, "Failed to ping %s from container %s", pingSrc, gwContainerNameAlt1) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "add", pingSrc, "via", nodeAddr}) + framework.ExpectNoError(err, "failed to add the pod host route on the test container") + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "del", pingSrc, "via", nodeAddr}) + if err != nil { + return fmt.Errorf("failed to add the pod host route on the test container: %v", err) + } + return nil + }) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ping", "-c", "5", pingSrc}) + framework.ExpectNoError(err, "Failed to ping %s from container %s", pingSrc, getContainerName(gwContainerNameTemplate, externalContainerPort)) time.Sleep(time.Second * 15) // Verify the gateway and remote address is reachable from the initial pod ginkgo.By(fmt.Sprintf("Verifying connectivity without vxlan to the updated annotation and initial external gateway %s and remote address %s", exGWIpAlt1, exGWRemoteIpAlt1)) _, err = e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, testContainerFlag, "--", "ping", "-w", "40", exGWRemoteIpAlt1) - if err != nil { - framework.Failf("Failed to ping the first gateway network %s from container %s on node %s: %v", exGWRemoteIpAlt1, ovnContainer, ovnWorkerNode, err) - } + framework.ExpectNoError(err, "Failed to ping the first gateway network %s from container %s on node %s: %v", exGWRemoteIpAlt1, testContainer, node.Name, err) // start the container that will act as a new external gateway that the tests will be updated to use - _, err = runCommand(containerRuntime, "run", "-itd", "--privileged", "--network", externalContainerNetwork, "--name", gwContainerNameAlt2, agnhostImage) - if err != nil { - framework.Failf("failed to start external gateway test container %s: %v", gwContainerNameAlt2, err) + externalContainer2Port := infraprovider.Get().GetExternalContainerPort() + externalContainer2 := infraapi.ExternalContainer{Name: getContainerName(gwContainerNameTemplate2, externalContainerPort), + Image: images.AgnHost(), Network: network, ExtPort: externalContainer2Port, Args: []string{"pause"}} + externalContainer2, err = providerCtx.CreateExternalContainer(externalContainer2) + framework.ExpectNoError(err, "failed to start external gateway test container %s", getContainerName(gwContainerNameTemplate2, externalContainerPort)) + if network.Name() == "host" { + // manually cleanup because cleanup doesnt cleanup host network + providerCtx.AddCleanUpFn(func() error { + return providerCtx.DeleteExternalContainer(externalContainer2) + }) } - // retrieve the container ip of the external gateway container - alt2IPv4, alt2IPv6 := getContainerAddressesForNetwork(gwContainerNameAlt2, externalContainerNetwork) - exGWIpAlt2 := alt2IPv4 + var exGWIpAlt2 string if isIPv6Cluster { - exGWIpAlt2 = alt2IPv6 + exGWIpAlt2 = externalContainer2.GetIPv6() + } else { + exGWIpAlt2 = externalContainer2.GetIPv4() + } + if exGWIpAlt2 == "" { + framework.Failf("failed to retrieve container %s IP address", getContainerName(gwContainerNameTemplate2, externalContainerPort)) } - // override the annotation in the test namespace with the new gateway annotateArgs = []string{ "annotate", @@ -228,25 +256,32 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.Logf("Annotating the external gateway test namespace to a new container remote IP:%s gw:%s ", exGWIpAlt2, exGWRemoteIpAlt2) e2ekubectl.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) // add loopback interface used to validate all traffic is getting drained through the gateway - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt2, "ip", "address", "add", exGWRemoteCidrAlt2, "dev", "lo") - if err != nil { - framework.Failf("failed to add the loopback ip to dev lo on the test container: %v", err) - } + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{"ip", "address", "add", exGWRemoteCidrAlt2, "dev", "lo"}) + framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the test container %s", getContainerName(gwContainerNameTemplate2, externalContainerPort)) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{"ip", "address", "del", exGWRemoteCidrAlt2, "dev", "lo"}) + if err != nil { + return fmt.Errorf("failed to cleanup loopback ip on test container %s: %v", getContainerName(gwContainerNameTemplate2, externalContainerPort), err) + } + return nil + }) // add a host route on the second mock gateway for return traffic to the pod - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt2, "ip", "route", "add", pingSrc, "via", nodeIP) - if err != nil { - framework.Failf("failed to add the pod route on the test container: %v", err) - } - - _, err = runCommand(containerRuntime, "exec", gwContainerNameAlt2, "ping", "-c", "5", pingSrc) - framework.ExpectNoError(err, "Failed to ping %s from container %s", pingSrc, gwContainerNameAlt1) - + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{"ip", "route", "add", pingSrc, "via", nodeAddr}) + framework.ExpectNoError(err, "failed to add the pod route on the test container %s", getContainerName(gwContainerNameTemplate2, externalContainerPort)) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{"ip", "route", "del", pingSrc, "via", nodeAddr}) + if err != nil { + return fmt.Errorf("failed to cleanup route on test container %s: %v", getContainerName(gwContainerNameTemplate2, externalContainerPort), err) + } + return nil + }) + // ping pod from external container + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer2, []string{"ping", "-c", "5", pingSrc}) + framework.ExpectNoError(err, "Failed to ping %s from container %s", pingSrc, getContainerName(gwContainerNameTemplate2, externalContainerPort)) // Verify the updated gateway and remote address is reachable from the initial pod ginkgo.By(fmt.Sprintf("Verifying connectivity without vxlan to the updated annotation and new external gateway %s and remote IP %s", exGWRemoteIpAlt2, exGWIpAlt2)) _, err = e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, testContainerFlag, "--", "ping", "-w", "40", exGWRemoteIpAlt2) - if err != nil { - framework.Failf("Failed to ping the second gateway network %s from container %s on node %s: %v", exGWRemoteIpAlt2, ovnContainer, ovnWorkerNode, err) - } + framework.ExpectNoError(err, "Failed to ping the second gateway network %s from container %s on node %s: %v", exGWRemoteIpAlt2, testContainer, node.Name) }) }) @@ -257,8 +292,8 @@ var _ = ginkgo.Describe("External Gateway", func() { // sourcing from the mock gateway container loopback to the test ns pod. var _ = ginkgo.Describe("e2e ingress gateway traffic validation", func() { const ( - svcname string = "novxlan-externalgw-ingress" - gwContainer string = "gw-ingress-test-container" + svcname string = "novxlan-externalgw-ingress" + gwContainerTemplate string = "gw-ingress-test-container-%d" ) f := wrappedTestFramework(svcname) @@ -270,11 +305,12 @@ var _ = ginkgo.Describe("External Gateway", func() { var ( workerNodeInfo nodeInfo - IsIPv6 bool + isIPv6 bool + providerCtx infraapi.Context ) ginkgo.BeforeEach(func() { - + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -283,21 +319,12 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - ips := e2enode.CollectAddresses(nodes, v1.NodeInternalIP) + ips := e2enode.CollectAddresses(nodes, corev1.NodeInternalIP) workerNodeInfo = nodeInfo{ name: nodes.Items[1].Name, nodeIP: ips[1], } - IsIPv6 = IsIPv6Cluster(f.ClientSet) - }) - - ginkgo.AfterEach(func() { - // tear down the container simulating the gateway - if cid, _ := runCommand(containerRuntime, "ps", "-qaf", fmt.Sprintf("name=%s", gwContainer)); cid != "" { - if _, err := runCommand(containerRuntime, "rm", "-f", gwContainer); err != nil { - framework.Logf("failed to delete the gateway test container %s %v", gwContainer, err) - } - } + isIPv6 = IsIPv6Cluster(f.ClientSet) }) ginkgo.It("Should validate ingress connectivity from an external gateway", func() { @@ -311,21 +338,41 @@ var _ = ginkgo.Describe("External Gateway", func() { pingCmd = ipv4PingCommand pingCount = "3" ) - if IsIPv6 { + if isIPv6 { exGWLo = "fc00::1" // unique local ipv6 unicast addr as per rfc4193 exGWLoCidr = fmt.Sprintf("%s/64", exGWLo) pingCmd = ipv6PingCommand } - // start the first container that will act as an external gateway - _, err := runCommand(containerRuntime, "run", "-itd", "--privileged", "--network", externalContainerNetwork, - "--name", gwContainer, agnhostImage) - if err != nil { - framework.Failf("failed to start external gateway test container %s: %v", gwContainer, err) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + overrideNetworkStr, overrideIPv4, overrideIPv6 := getOverrideNetwork() + if overrideNetworkStr != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkStr) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer := infraapi.ExternalContainer{Name: getContainerName(gwContainerTemplate, externalContainerPort), Image: images.AgnHost(), Network: network, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to start external gateway test container %s", getContainerName(gwContainerTemplate, externalContainerPort)) + if network.Name() == "host" { + // manually cleanup because cleanup doesnt cleanup host network + providerCtx.AddCleanUpFn(func() error { + return providerCtx.DeleteExternalContainer(externalContainer) + }) + } + + exGWIp := externalContainer.GetIPv4() + if overrideIPv4 != "" { + exGWIp = overrideIPv4 } - exGWIp, exGWIpv6 := getContainerAddressesForNetwork(gwContainer, externalContainerNetwork) - if IsIPv6 { - exGWIp = exGWIpv6 + if isIPv6 { + exGWIp = externalContainer.GetIPv6() + if overrideIPv6 != "" { + exGWIp = overrideIPv6 + } } // annotate the test namespace with the external gateway address annotateArgs := []string{ @@ -336,10 +383,11 @@ var _ = ginkgo.Describe("External Gateway", func() { } framework.Logf("Annotating the external gateway test namespace to container gateway: %s", exGWIp) e2ekubectl.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - - nodeIP, nodeIPv6 := getContainerAddressesForNetwork(workerNodeInfo.name, externalContainerNetwork) - if IsIPv6 { - nodeIP = nodeIPv6 + primaryNetworkInf, err := infraprovider.Get().GetK8NodeNetworkInterface(workerNodeInfo.name, network) + framework.ExpectNoError(err, "failed to get network interface info for network (%s) on node %s", network, workerNodeInfo.name) + nodeIP := primaryNetworkInf.IPv4 + if isIPv6 { + nodeIP = primaryNetworkInf.IPv6 } framework.Logf("the pod side node is %s and the source node ip is %s", workerNodeInfo.name, nodeIP) podCIDR, _, err := getNodePodCIDRs(workerNodeInfo.name) @@ -364,24 +412,32 @@ var _ = ginkgo.Describe("External Gateway", func() { if err != nil { framework.Failf("Error trying to get the pod IP address") } - // add a host route on the gateway for return traffic to the pod - _, err = runCommand(containerRuntime, "exec", gwContainer, "ip", "route", "add", pingDstPod, "via", nodeIP) - if err != nil { - framework.Failf("failed to add the pod host route on the test container %s: %v", gwContainer, err) - } + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "add", pingDstPod, "via", nodeIP}) + framework.ExpectNoError(err, "failed to add the pod host route on the test container %s", gwContainerTemplate) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "del", pingDstPod, "via", nodeIP}) + if err != nil { + return fmt.Errorf("failed to cleanup route in external container %s: %v", gwContainerTemplate, err) + } + return nil + }) // add a loopback address to the mock container that will source the ingress test - _, err = runCommand(containerRuntime, "exec", gwContainer, "ip", "address", "add", exGWLoCidr, "dev", "lo") - if err != nil { - framework.Failf("failed to add the loopback ip to dev lo on the test container: %v", err) - } - + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "address", "add", exGWLoCidr, "dev", "lo"}) + framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the test container %s", gwContainerTemplate) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "address", "del", exGWLoCidr, "dev", "lo"}) + if err != nil { + return fmt.Errorf("failed to cleanup loopback ip on dev lo within test container %s: %v", gwContainerTemplate, err) + } + return nil + }) // Validate connectivity from the external gateway loopback to the pod in the test namespace - ginkgo.By(fmt.Sprintf("Validate ingress traffic from the external gateway %s can reach the pod in the exgw annotated namespace", gwContainer)) + ginkgo.By(fmt.Sprintf("Validate ingress traffic from the external gateway %s can reach the pod in the exgw annotated namespace", + fmt.Sprintf(gwContainerTemplate, externalContainer.GetPort()))) // generate traffic that will verify connectivity from the mock external gateway loopback - _, err = runCommand(containerRuntime, "exec", gwContainer, string(pingCmd), "-c", pingCount, "-I", "eth0", pingDstPod) - if err != nil { - framework.Failf("failed to ping the pod address %s from mock container %s: %v", pingDstPod, gwContainer, err) - } + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{string(pingCmd), "-c", pingCount, + "-I", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), pingDstPod}) + framework.ExpectNoError(err, "failed to ping the pod address %s from mock container %s", pingDstPod, gwContainerTemplate) }) }) @@ -392,33 +448,28 @@ var _ = ginkgo.Describe("External Gateway", func() { // The traffic will get proxied through an annotated pod in the serving namespace. var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" + svcname string = "externalgw-pod-novxlan" + gwContainer1Template string = "ex-gw-container1-%d" + gwContainer2Template string = "ex-gw-container2-%d" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ecmpRetry int = 20 + testTimeout time.Duration = 20 * time.Second ) var ( sleepCommand = []string{"bash", "-c", "sleep 20000"} addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface servingNamespace string - ) - - var ( - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -431,13 +482,20 @@ var _ = ginkgo.Describe("External Gateway", func() { ns, err := f.CreateNamespace(context.TODO(), "exgw-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) - setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + overrideNetworkStr, _, _ := getOverrideNetwork() + if overrideNetworkStr != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkStr) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, + srcPingPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) + setupAnnotatedGatewayPods(f, nodes, network, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) }) ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) resetGatewayAnnotations(f) }) @@ -450,15 +508,17 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) } pingSync := sync.WaitGroup{} @@ -470,8 +530,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(t) } pingSync.Wait() @@ -481,18 +543,18 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.Entry("ipv6", &addressesv6, "icmp6")) ginkgo.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + func(protocol string, addresses *gatewayTestIPs, gwPort, podPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPingPodName, protocol) } expectedHostNames := make(map[string]struct{}) for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") + res, err := infraprovider.Get().ExecExternalContainerCommand(c, []string{"hostname"}) framework.ExpectNoError(err, "failed to run hostname in %s", c) hostname := strings.TrimSuffix(res, "\n") framework.Logf("Hostname for %s is %s", c, hostname) @@ -502,17 +564,17 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Checking that external ips are reachable with both gateways") returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] success := false for i := 0; i < 20; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", gwIP, gwPort)) } res, err := e2ekubectl.RunKubectl(f.Namespace.Name, args...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + framework.ExpectNoError(err, "failed to reach %s (%s)", gwIP, protocol) hostname := strings.TrimSuffix(res, "\n") if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -530,10 +592,10 @@ var _ = ginkgo.Describe("External Gateway", func() { } }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort, podTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort, podTCPPort)) }) // Validate pods can reach a network running in multiple container's loopback @@ -547,22 +609,22 @@ var _ = ginkgo.Describe("External Gateway", func() { // The test checks that both hostnames are collected at least once. var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + testTimeout time.Duration = 300 * time.Second + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" ) f := wrappedTestFramework(svcname) - var gwContainers []string + var gwContainers []infraapi.ExternalContainer + var providerCtx infraapi.Context var addressesv4, addressesv6 gatewayTestIPs ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -571,19 +633,20 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } else if overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork } - - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) - + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, + srcPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) }) ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) resetGatewayAnnotations(f) }) @@ -597,14 +660,18 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Verifying connectivity to the pod from external gateways") for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-s", "1420", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } // Verify the gateways and remote loopback addresses are reachable from the pod. @@ -621,7 +688,7 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) } pingSync := sync.WaitGroup{} @@ -633,10 +700,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } pingSync.Wait() @@ -647,7 +714,7 @@ var _ = ginkgo.Describe("External Gateway", func() { // This test runs a listener on the external container, returning the host name both on tcp and udp. // The src pod tries to hit the remote address until both the containers are hit. - ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, gwPort, podPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } @@ -655,19 +722,19 @@ var _ = ginkgo.Describe("External Gateway", func() { annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPodName, protocol) } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) success := false // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -683,53 +750,59 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) } - }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), - ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), - ginkgo.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), - ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", gwUDPPort, podUDPPort), + ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", gwTCPPort, podTCPPort), + ginkgo.Entry("IPV6 udp", &addressesv6, "udp", gwUDPPort, podUDPPort), + ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", gwTCPPort, podTCPPort)) }) var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - srcPodName string = "e2e-exgw-src-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" ) f := wrappedTestFramework(svcname) var ( addressesv4, addressesv6 gatewayTestIPs + externalContainers []infraapi.ExternalContainer + providerCtx infraapi.Context sleepCommand []string - nodes *v1.NodeList + nodes *corev1.NodeList err error - clientSet kubernetes.Interface servingNamespace string ) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names - nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), clientSet, 3) + nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { framework.Failf( "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "override network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } - ns, err := f.CreateNamespace(context.TODO(), "exgw-conntrack-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + externalContainers, addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, srcPodName) sleepCommand = []string{"bash", "-c", "trap : TERM INT; sleep infinity & wait"} _, err = createGenericPod(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand) framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) @@ -738,10 +811,6 @@ var _ = ginkgo.Describe("External Gateway", func() { }) ginkgo.AfterEach(func() { - // tear down the containers and pods simulating the gateways - ginkgo.By("Deleting the gateway containers") - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) resetGatewayAnnotations(f) }) @@ -751,22 +820,25 @@ var _ = ginkgo.Describe("External Gateway", func() { } ginkgo.By("Annotate the app namespace to get managed by external gateways") annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + + for i, externalContainer := range externalContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"iperf3", "-u", "-c", addresses.srcPodIP, + "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"}) + framework.ExpectNoError(err, "failed to execute iperf command from external container") + networkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(externalContainer, network) + framework.ExpectNoError(err, "failed to get %s network information for external container %s", network.Name(), externalContainer.Name) // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInfo.MAC, ":", "", -1), "0") } ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") @@ -825,21 +897,24 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) } - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + for i, container := range externalContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + cmd := []string{"iperf3", "-u", "-c", addresses.srcPodIP, "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err = infraprovider.Get().ExecExternalContainerCommand(container, cmd) + framework.ExpectNoError(err, "failed to start iperf client from external container") + networkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(container, network) + framework.ExpectNoError(err, "failed to get external container network information") // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInfo.MAC, ":", "", -1), "0") } ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") @@ -912,34 +987,28 @@ var _ = ginkgo.Describe("External Gateway", func() { var _ = ginkgo.Context("BFD", func() { var _ = ginkgo.Describe("e2e non-vxlan external gateway through an annotated gateway pod", func() { const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - defaultPolicyName = "default-route-policy" + svcname string = "externalgw-pod-novxlan" + gwContainer1Template string = "ex-gw-container1-%d" + gwContainer2Template string = "ex-gw-container2-%d" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ecmpRetry int = 20 + testTimeout time.Duration = 20 * time.Second ) var ( sleepCommand = []string{"bash", "-c", "sleep 20000"} addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface servingNamespace string - ) - - var ( - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -952,14 +1021,19 @@ var _ = ginkgo.Describe("External Gateway", func() { ns, err := f.CreateNamespace(context.TODO(), "exgw-bfd-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, + gwContainer2Template, srcPingPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, true) + setupAnnotatedGatewayPods(f, nodes, network, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) }) ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) resetGatewayAnnotations(f) }) @@ -972,21 +1046,24 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Verifying connectivity to the pod from external gateways") for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) - for _, gwContainer := range gwContainers { - gomega.Expect(isBFDPaired(gwContainer, addresses.nodeIP)).To(gomega.Equal(true), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) } // Verify the external gateway loopback address running on the external container is reachable and @@ -1001,10 +1078,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(address) } @@ -1013,12 +1090,13 @@ var _ = ginkgo.Describe("External Gateway", func() { if len(gwContainers) > 1 { ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) time.Sleep(3 * time.Second) // bfd timeout tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gwContainers[0], srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainers[0], srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) // Verify the external gateway loopback address running on the external container is reachable and // that traffic from the source ping pod is proxied through the pod in the serving namespace @@ -1030,8 +1108,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(t) } pingSync.Wait() @@ -1042,28 +1122,34 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.Entry("ipv6", &addressesv6, "icmp6")) ginkgo.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort int) { + func(protocol string, addresses *gatewayTestIPs, gwPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } for _, gwContainer := range gwContainers { - gomega.Expect(isBFDPaired(gwContainer, addresses.nodeIP)).To(gomega.Equal(true), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] success := false for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -1081,24 +1167,25 @@ var _ = ginkgo.Describe("External Gateway", func() { if len(gwContainers) > 1 { ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") time.Sleep(3 * time.Second) // bfd timeout // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + expectedHostName := hostNameForExternalContainer(gwContainers[0]) ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) gomega.Expect(expectedHostName).To(gomega.Equal(hostname), "Hostname returned by nc not as expected") } } }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort)) }) // Validate pods can reach a network running in multiple container's loopback @@ -1112,28 +1199,26 @@ var _ = ginkgo.Describe("External Gateway", func() { // The test checks that both hostnames are collected at least once. var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + testTimeout time.Duration = 30 * time.Second + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" ) var ( - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context + testContainer = fmt.Sprintf("%s-container", srcPodName) + testContainerFlag = fmt.Sprintf("--container=%s", testContainer) + addressesv4, addressesv6 gatewayTestIPs ) - testContainer := fmt.Sprintf("%s-container", srcPodName) - testContainerFlag := fmt.Sprintf("--container=%s", testContainer) - f := wrappedTestFramework(svcname) - var addressesv4, addressesv6 gatewayTestIPs - ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { @@ -1141,20 +1226,22 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, + gwContainer1Template, gwContainer2Template, srcPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, true) }) ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) resetGatewayAnnotations(f) }) @@ -1166,15 +1253,18 @@ var _ = ginkgo.Describe("External Gateway", func() { annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) - for _, gwContainer := range gwContainers { - gomega.Expect(isBFDPaired(gwContainer, addresses.nodeIP)).To(gomega.Equal(true), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } // Verify the gateways and remote loopback addresses are reachable from the pod. @@ -1191,7 +1281,7 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) } // spawn a goroutine to asynchronously (to speed up the test) @@ -1206,10 +1296,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } @@ -1217,14 +1307,15 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync.Wait() ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) time.Sleep(3 * time.Second) // bfd timeout pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gwContainers[0], srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainers[0], srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) // spawn a goroutine to asynchronously (to speed up the test) // to ping the gateway loopbacks on both containers via ECMP. @@ -1233,10 +1324,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } @@ -1248,7 +1339,7 @@ var _ = ginkgo.Describe("External Gateway", func() { // This test runs a listener on the external container, returning the host name both on tcp and udp. // The src pod tries to hit the remote address until both the containers are hit. - ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, gwPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } @@ -1256,27 +1347,30 @@ var _ = ginkgo.Describe("External Gateway", func() { annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) - for _, gwContainer := range gwContainers { - gomega.Expect(isBFDPaired(gwContainer, addresses.nodeIP)).To(gomega.Equal(true), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) success := false // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -1293,22 +1387,23 @@ var _ = ginkgo.Describe("External Gateway", func() { } ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") time.Sleep(3 * time.Second) // bfd timeout // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + expectedHostName := hostNameForExternalContainer(gwContainers[0]) ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) gomega.Expect(expectedHostName).To(gomega.Equal(hostname), "Hostname returned by nc not as expected") } - }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), - ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), - ginkgo.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), - ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", gwUDPPort), + ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", gwTCPPort), + ginkgo.Entry("IPV6 udp", &addressesv6, "udp", gwUDPPort), + ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", gwTCPPort)) }) }) @@ -1321,30 +1416,28 @@ var _ = ginkgo.Describe("External Gateway", func() { // The traffic will get proxied through an annotated pod in the serving namespace. var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" + svcname string = "externalgw-pod-novxlan" + gwContainer1Template string = "ex-gw-container1-%d" + gwContainer2Template string = "ex-gw-container2-%d" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ecmpRetry int = 20 + testTimeout time.Duration = 20 * time.Second ) var ( sleepCommand = []string{"bash", "-c", "sleep 20000"} addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface servingNamespace string - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -1357,14 +1450,20 @@ var _ = ginkgo.Describe("External Gateway", func() { ns, err := f.CreateNamespace(context.TODO(), "exgw-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) - setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, + gwContainer2Template, srcPingPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) + setupPolicyBasedGatewayPods(f, nodes, network, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) }) ginkgo.AfterEach(func() { deleteAPBExternalRouteCR(defaultPolicyName) - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) }) ginkgo.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a gateway pod", @@ -1377,15 +1476,17 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) } pingSync := sync.WaitGroup{} @@ -1397,8 +1498,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(t) } pingSync.Wait() @@ -1409,39 +1512,39 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.Entry("ipv6", &addressesv6, "icmp6")) ginkgo.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a gateway pod", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + func(protocol string, addresses *gatewayTestIPs, gwPort, podPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPingPodName, protocol) } expectedHostNames := make(map[string]struct{}) for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") - framework.ExpectNoError(err, "failed to run hostname in %s", c) + res, err := infraprovider.Get().ExecExternalContainerCommand(c, []string{"hostname"}) + framework.ExpectNoError(err, "failed to run hostname in %s", c.Name) hostname := strings.TrimSuffix(res, "\n") - framework.Logf("Hostname for %s is %s", c, hostname) + framework.Logf("Hostname for %s is %s", c.Name, hostname) expectedHostNames[hostname] = struct{}{} } framework.Logf("Expected hostnames are %v", expectedHostNames) ginkgo.By("Checking that external ips are reachable with both gateways") returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] success := false for i := 0; i < 20; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", gwIP, gwPort)) } res, err := e2ekubectl.RunKubectl(f.Namespace.Name, args...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + framework.ExpectNoError(err, "failed to reach %s (%s)", gwIP, protocol) hostname := strings.TrimSuffix(res, "\n") if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1459,18 +1562,18 @@ var _ = ginkgo.Describe("External Gateway", func() { } checkAPBExternalRouteStatus(defaultPolicyName) }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort, podTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort, podTCPPort)) ginkgo.DescribeTable("Should validate TCP/UDP connectivity even after MAC change (gateway migration) for egress", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + func(protocol string, addresses *gatewayTestIPs, gwPort, podPort int) { ncCmd := func(sourcePort int, target string) []string { if protocol == "tcp" { - return []string{"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 %s %d", sourcePort, addresses.srcPodIP, target, destPort)} + return []string{"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 %s %d", sourcePort, addresses.srcPodIP, target, gwPort)} } else { - return []string{"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 -u %s %d", sourcePort, addresses.srcPodIP, target, destPort)} + return []string{"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 -u %s %d", sourcePort, addresses.srcPodIP, target, gwPort)} } } if addresses.srcPodIP == "" || addresses.nodeIP == "" { @@ -1481,23 +1584,23 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Checking Ingress connectivity from gateways") // Check Ingress connectivity for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPingPodName, protocol) } // Get hostnames of gateways // map of hostname to gateway - expectedHostNames := make(map[string]string) + expectedHostNames := make(map[string]infraapi.ExternalContainer) gwAddresses := make(map[string]string) for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") + res, err := infraprovider.Get().ExecExternalContainerCommand(c, []string{"hostname"}) framework.ExpectNoError(err, "failed to run hostname in %s", c) hostname := strings.TrimSuffix(res, "\n") - res, err = runCommand(containerRuntime, "exec", c, "hostname", "-I") + res, err = infraprovider.Get().ExecExternalContainerCommand(c, []string{"hostname", "-I"}) framework.ExpectNoError(err, "failed to run hostname in %s", c) ips := strings.TrimSuffix(res, "\n") framework.Logf("Hostname for %s is %s, with IP addresses: %s", c, hostname, ips) expectedHostNames[hostname] = c - gwAddresses[c] = ips + gwAddresses[c.Name] = ips } framework.Logf("Expected hostnames are %v", expectedHostNames) @@ -1506,62 +1609,66 @@ var _ = ginkgo.Describe("External Gateway", func() { // https://github.com/ovn-org/ovn-kubernetes/pull/4114#issuecomment-1940916326 // TODO(trozet) change this back to 2 gateways once github actions kernel is updated ginkgo.By(fmt.Sprintf("Reducing to one gateway. Removing gateway: %s", gatewayPodName2)) - err := deletePodWithWaitByName(context.TODO(), f.ClientSet, gatewayPodName2, servingNamespace) + err := e2epod.DeletePodWithWaitByName(context.TODO(), f.ClientSet, gatewayPodName2, servingNamespace) framework.ExpectNoError(err, "failed to delete pod %s/%s", servingNamespace, gatewayPodName2) time.Sleep(1 * time.Second) ginkgo.By("Checking if one of the external gateways are reachable via Egress") - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] sourcePort := 50000 - res, err := e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + res, err := e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, gwIP)...) + framework.ExpectNoError(err, "failed to reach %s (%s)", gwIP, protocol) hostname := strings.TrimSuffix(res, "\n") - var gateway string + var gateway infraapi.ExternalContainer if g, ok := expectedHostNames[hostname]; !ok { framework.Failf("Unexpected gateway hostname %q, expected; %#v", hostname, expectedHostNames) } else { gateway = g } - - macAddressExGW, err := net.ParseMAC(getMACAddressesForNetwork(gateway, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to find MAC address of hostname: %s", hostname) - framework.Logf("Egress gateway reached: %s, with MAC: %q", gateway, macAddressExGW) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gatewayContainerNetworkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(gateway, network) + framework.ExpectNoError(err, "failed to get network information for gateway container") + framework.Logf("Egress gateway reached: %s, with MAC: %q", gateway, gatewayContainerNetworkInfo.MAC) ginkgo.By("Sending traffic again and verifying packet is received at gateway") tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gateway, srcPingPodName, anyLink, []string{protocol, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) - res, err = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + go checkReceivedPacketsOnExternalContainer(gateway, srcPingPodName, anyLink, []string{protocol, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) + res, err = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, gwIP)...) + framework.ExpectNoError(err, "failed to reach %s (%s)", gwIP, protocol) hostname2 := strings.TrimSuffix(res, "\n") gomega.Expect(hostname).To(gomega.Equal(hostname2)) tcpDumpSync.Wait() newDummyMac := "02:11:22:33:44:56" - gwLink := "eth0" + ginkgo.By(fmt.Sprintf("Modifying MAC address of gateway %q to simulate migration, new MAC: %s", gateway, newDummyMac)) - defer setMACAddrOnContainer(gateway, macAddressExGW.String(), "eth0") - setMACAddrOnContainer(gateway, newDummyMac, gwLink) + _, err = infraprovider.Get().ExecExternalContainerCommand(gateway, []string{"ip", "link", "set", "dev", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "addr", newDummyMac}) + framework.ExpectNoError(err, "failed to set MAC on external container") + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(gateway, []string{"ip", "link", "set", "dev", + infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "addr", gatewayContainerNetworkInfo.MAC}) + return err + }) ginkgo.By("Sending layer 2 advertisement from external gateway") time.Sleep(1 * time.Second) - // we need to know which gateway IP we are using - var gwAddr string - for _, a := range addresses.gatewayIPs { - if strings.Contains(gwAddresses[gateway], a) { - gwAddr = a - break - } - } - gomega.Expect(gwAddr).To(gomega.Not(gomega.BeEmpty())) - - if utilnet.IsIPv4String(gwAddr) { - sendGARP(gateway, gwAddr, gwLink) + if IsIPv6Cluster(f.ClientSet) { + _, err = infraprovider.Get().ExecExternalContainerCommand(gateway, []string{"ndptool", "-t", "na", "-U", + "-i", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "-T", gatewayContainerNetworkInfo.IPv6, "send"}) } else { - sendNDPAdvertisement(gateway, gwAddr, gwLink) + _, err = infraprovider.Get().ExecExternalContainerCommand(gateway, []string{"arping", "-U", gatewayContainerNetworkInfo.IPv4, + "-I", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "-c", "1", "-s", gatewayContainerNetworkInfo.IPv4}) } + framework.ExpectNoError(err, "arp / nd must succeed") time.Sleep(1 * time.Second) ginkgo.By("Post-Migration: Sending Egress traffic and verify it is received") @@ -1571,20 +1678,21 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gateway, srcPingPodName, gwLink, []string{protocol, "and", "ether", "host", newDummyMac, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gateway, srcPingPodName, infraprovider.Get().ExternalContainerPrimaryInterfaceName(), + []string{protocol, "and", "ether", "host", newDummyMac, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) // Sometimes the external gateway will fail to respond to the request with // SKB_DROP_REASON_NEIGH_FAILED after changing the MAC address. Something breaks with ARP // on the gateway container. Therefore, ignore the reply from gateway, as we only care about the egress // packet arriving with correct MAC address. - _, _ = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) + _, _ = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, gwIP)...) tcpDumpSync.Wait() checkAPBExternalRouteStatus(defaultPolicyName) }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort, podTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort, podTCPPort)) }) // Validate pods can reach a network running in multiple container's loopback @@ -1598,22 +1706,24 @@ var _ = ginkgo.Describe("External Gateway", func() { // The test checks that both hostnames are collected at least once. var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + testTimeout time.Duration = 30 * time.Second + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" ) f := wrappedTestFramework(svcname) - var gwContainers []string - var addressesv4, addressesv6 gatewayTestIPs + var ( + providerCtx infraapi.Context + gwContainers []infraapi.ExternalContainer + addressesv4, addressesv6 gatewayTestIPs + ) ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -1622,17 +1732,21 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, + srcPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) }) ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) deleteAPBExternalRouteCR(defaultPolicyName) }) @@ -1645,14 +1759,18 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Verifying connectivity to the pod from external gateways") for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-s", "1420", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } // Verify the gateways and remote loopback addresses are reachable from the pod. @@ -1669,7 +1787,7 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) } pingSync := sync.WaitGroup{} @@ -1681,10 +1799,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } pingSync.Wait() @@ -1695,26 +1813,26 @@ var _ = ginkgo.Describe("External Gateway", func() { // This test runs a listener on the external container, returning the host name both on tcp and udp. // The src pod tries to hit the remote address until both the containers are hit. - ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, gwPort, podPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPodName, protocol) } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) success := false // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -1730,20 +1848,20 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) } - }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), - ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), - ginkgo.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), - ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", gwUDPPort, podUDPPort), + ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", gwTCPPort, podTCPPort), + ginkgo.Entry("IPV6 udp", &addressesv6, "udp", gwUDPPort, podUDPPort), + ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", gwTCPPort, podTCPPort)) }) var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - srcPodName string = "e2e-exgw-src-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" ) f := wrappedTestFramework(svcname) @@ -1752,23 +1870,30 @@ var _ = ginkgo.Describe("External Gateway", func() { servingNamespace string addressesv4, addressesv6 gatewayTestIPs sleepCommand []string - nodes *v1.NodeList + nodes *corev1.NodeList err error - clientSet kubernetes.Interface + providerCtx infraapi.Context + gwContainers []infraapi.ExternalContainer ) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names - nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), clientSet, 3) + nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { framework.Failf( "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkStr, _, _ := getOverrideNetwork(); overrideNetworkStr != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkStr) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } @@ -1776,7 +1901,7 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.ExpectNoError(err) servingNamespace = ns.Name - addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + gwContainers, addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, srcPodName) sleepCommand = []string{"bash", "-c", "sleep 20000"} _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"name": gatewayPodName1, "gatewayPod": "true"}) framework.ExpectNoError(err, "Create the external gw pods to manage the src app pod namespace, failed: %v", err) @@ -1785,8 +1910,6 @@ var _ = ginkgo.Describe("External Gateway", func() { }) ginkgo.AfterEach(func() { - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) deleteAPBExternalRouteCR(defaultPolicyName) }) @@ -1796,22 +1919,25 @@ var _ = ginkgo.Describe("External Gateway", func() { } ginkgo.By("Create a static hop in an Admin Policy Based External Route CR targeting the app namespace to get managed by external gateways") createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - klog.Infof("iperf3 command %s", strings.Join(cmd, " ")) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + for i, container := range gwContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + _, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{"iperf3", "-u", "-c", addresses.srcPodIP, "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"}) + framework.ExpectNoError(err, "failed to connect to iperf3 server") + networkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(container, network) + framework.ExpectNoError(err, "failed to get network %s info from external container %s", network.Name(), container.Name) + // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInfo.MAC, ":", "", -1), "0") } ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") nodeName := getPod(f, srcPodName).Spec.NodeName @@ -1866,23 +1992,25 @@ var _ = ginkgo.Describe("External Gateway", func() { } createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addresses.gatewayIPs) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - klog.Infof("Run command %+v", cmd) - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + for i, container := range gwContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + _, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{"iperf3", "-u", "-c", addresses.srcPodIP, + "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"}) + framework.ExpectNoError(err, "failed to start iperf3 client command") + networkInterface, err := infraprovider.Get().GetExternalContainerNetworkInterface(container, network) + framework.ExpectNoError(err, "failed to get network %s information for container %s", network.Name(), container.Name) // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInterface.MAC, ":", "", -1), "0") } ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") @@ -1945,31 +2073,29 @@ var _ = ginkgo.Describe("External Gateway", func() { var _ = ginkgo.Describe("e2e non-vxlan external gateway through a dynamic hop", func() { const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - defaultPolicyName = "default-route-policy" + svcname string = "externalgw-pod-novxlan" + gwContainer1Template string = "ex-gw-container1-%d" + gwContainer2Template string = "ex-gw-container2-%d" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ecmpRetry int = 20 + testTimeout time.Duration = 20 * time.Second + defaultPolicyName = "default-route-policy" ) var ( sleepCommand = []string{"bash", "-c", "sleep 20000"} addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface servingNamespace string - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -1982,17 +2108,23 @@ var _ = ginkgo.Describe("External Gateway", func() { ns, err := f.CreateNamespace(context.TODO(), "exgw-bfd-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, + gwContainer1Template, gwContainer2Template, srcPingPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, true) ginkgo.By("Create the external route policy with dynamic hops to manage the src app pod namespace") - setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + setupPolicyBasedGatewayPods(f, nodes, network, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) }) ginkgo.AfterEach(func() { deleteAPBExternalRouteCR(defaultPolicyName) - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + }) ginkgo.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with dynamic hop", @@ -2005,21 +2137,25 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Verifying connectivity to the pod from external gateways") for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } // This is needed for bfd to sync up for _, gwContainer := range gwContainers { - gomega.Eventually(func() bool { - return isBFDPaired(gwContainer, addresses.nodeIP) - }, time.Minute, 5).Should(gomega.BeTrue(), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) } // Verify the external gateway loopback address running on the external container is reachable and @@ -2034,10 +2170,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(address) } @@ -2046,12 +2182,14 @@ var _ = ginkgo.Describe("External Gateway", func() { if len(gwContainers) > 1 { ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) + time.Sleep(3 * time.Second) // bfd timeout tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gwContainers[0], srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainers[0], srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) // Verify the external gateway loopback address running on the external container is reachable and // that traffic from the source ping pod is proxied through the pod in the serving namespace @@ -2063,8 +2201,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) }(t) } pingSync.Wait() @@ -2076,31 +2216,35 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.Entry("ipv6", &addressesv6, "icmp6")) ginkgo.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with a dynamic hop", - func(protocol string, addresses *gatewayTestIPs, destPort int) { + func(protocol string, addresses *gatewayTestIPs, gwPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, true, addressesv4.gatewayIPs) for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } for _, gwContainer := range gwContainers { - gomega.Eventually(func() bool { - return isBFDPaired(gwContainer, addresses.nodeIP) - }, 10, 1).Should(gomega.BeTrue(), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] success := false for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -2118,25 +2262,26 @@ var _ = ginkgo.Describe("External Gateway", func() { if len(gwContainers) > 1 { ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") time.Sleep(3 * time.Second) // bfd timeout // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + expectedHostName := hostNameForExternalContainer(gwContainers[0]) ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) gomega.Expect(expectedHostName).To(gomega.Equal(hostname), "Hostname returned by nc not as expected") } } checkAPBExternalRouteStatus(defaultPolicyName) }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort)) }) // Validate pods can reach a network running in multiple container's loopback @@ -2150,28 +2295,25 @@ var _ = ginkgo.Describe("External Gateway", func() { // The test checks that both hostnames are collected at least once. var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + testTimeout time.Duration = 30 * time.Second + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" ) var ( - gwContainers []string + gwContainers []infraapi.ExternalContainer + testContainer = fmt.Sprintf("%s-container", srcPodName) + testContainerFlag = fmt.Sprintf("--container=%s", testContainer) + f = wrappedTestFramework(svcname) + providerCtx infraapi.Context + addressesv4, addressesv6 gatewayTestIPs ) - testContainer := fmt.Sprintf("%s-container", srcPodName) - testContainerFlag := fmt.Sprintf("--container=%s", testContainer) - - f := wrappedTestFramework(svcname) - - var addressesv4, addressesv6 gatewayTestIPs - ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { @@ -2179,19 +2321,21 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, + gwContainer2Template, srcPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, true) }) ginkgo.AfterEach(func() { - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) deleteAPBExternalRouteCR(defaultPolicyName) }) @@ -2202,14 +2346,18 @@ var _ = ginkgo.Describe("External Gateway", func() { createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } for _, gwContainer := range gwContainers { - gomega.Eventually(func() bool { - return isBFDPaired(gwContainer, addresses.nodeIP) - }, 5).Should(gomega.BeTrue(), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } // Verify the gateways and remote loopback addresses are reachable from the pod. @@ -2226,7 +2374,7 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) } // spawn a goroutine to asynchronously (to speed up the test) @@ -2241,10 +2389,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } @@ -2252,14 +2400,15 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync.Wait() ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) time.Sleep(3 * time.Second) // bfd timeout pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) - go checkReceivedPacketsOnContainer(gwContainers[0], srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainers[0], srcPodName, anyLink, []string{icmpToDump}, &tcpDumpSync) // spawn a goroutine to asynchronously (to speed up the test) // to ping the gateway loopbacks on both containers via ECMP. @@ -2268,10 +2417,10 @@ var _ = ginkgo.Describe("External Gateway", func() { go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) - } + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c1", "-W1", target). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", target, srcPodName) }(address) } @@ -2283,34 +2432,37 @@ var _ = ginkgo.Describe("External Gateway", func() { // This test runs a listener on the external container, returning the host name both on tcp and udp. // The src pod tries to hit the remote address until both the containers are hit. - ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + ginkgo.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, gwPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-c1", "-W1", addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) - for _, gwContainer := range gwContainers { - gomega.Expect(isBFDPaired(gwContainer, addresses.nodeIP)).To(gomega.Equal(true), "Bfd not paired") + gomega.Eventually(isBFDPaired). + WithArguments(gwContainer, addresses.nodeIP). + WithTimeout(time.Minute). + WithPolling(5*time.Second). + Should(gomega.BeTrue(), "Bfd not paired") } - expectedHostNames := hostNamesForContainers(gwContainers) + expectedHostNames := hostNamesForExternalContainers(gwContainers) framework.Logf("Expected hostnames are %v", expectedHostNames) returnedHostNames := make(map[string]struct{}) success := false // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} } @@ -2327,22 +2479,23 @@ var _ = ginkgo.Describe("External Gateway", func() { } ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) + err := providerCtx.DeleteExternalContainer(gwContainers[1]) + framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") time.Sleep(3 * time.Second) // bfd timeout // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + expectedHostName := hostNameForExternalContainer(gwContainers[0]) ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) gomega.Expect(expectedHostName).To(gomega.Equal(hostname), "Hostname returned by nc not as expected") } - }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), - ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), - ginkgo.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), - ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }, ginkgo.Entry("IPV4 udp", &addressesv4, "udp", gwUDPPort), + ginkgo.Entry("IPV4 tcp", &addressesv4, "tcp", gwTCPPort), + ginkgo.Entry("IPV6 udp", &addressesv6, "udp", gwUDPPort), + ginkgo.Entry("IPV6 tcp", &addressesv6, "tcp", gwTCPPort)) }) }) }) @@ -2353,30 +2506,28 @@ var _ = ginkgo.Describe("External Gateway", func() { // The traffic will get proxied through an annotated pod in the serving namespace. var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" + svcname string = "externalgw-pod-novxlan" + gwContainer1Template string = "ex-gw-container1-%d" + gwContainer2Template string = "ex-gw-container2-%d" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ecmpRetry int = 20 + testTimeout time.Duration = 20 * time.Second ) var ( sleepCommand = []string{"bash", "-c", "sleep 20000"} addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface servingNamespace string - gwContainers []string + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -2389,13 +2540,19 @@ var _ = ginkgo.Describe("External Gateway", func() { ns, err := f.CreateNamespace(context.TODO(), "exgw-serving", nil) framework.ExpectNoError(err) servingNamespace = ns.Name - - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) - setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network info") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, providerCtx, nodes, network, + gwContainer1Template, gwContainer2Template, srcPingPodName, gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) + setupAnnotatedGatewayPods(f, nodes, network, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) }) ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) deleteAPBExternalRouteCR(defaultPolicyName) resetGatewayAnnotations(f) }) @@ -2414,14 +2571,16 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) for _, gwContainer := range gwContainers { // Ping from a common IP address that exists on both gateways to ensure test coverage where ingress reply goes back to the same host. - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-I", addresses.targetIPs[0], "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + gomega.Eventually(infraprovider.Get().ExecExternalContainerCommand). + WithArguments(gwContainer, []string{"ping", "-B", "-c1", "-W1", "-I", addresses.targetIPs[0], addresses.srcPodIP}). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer.Name) } tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(len(gwContainers)) for _, gwContainer := range gwContainers { - go checkReceivedPacketsOnContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) + go checkReceivedPacketsOnExternalContainer(gwContainer, srcPingPodName, anyLink, []string{icmpCommand}, &tcpDumpSync) } // Verify the external gateway loopback address running on the external container is reachable and @@ -2430,11 +2589,13 @@ var _ = ginkgo.Describe("External Gateway", func() { pingSync := sync.WaitGroup{} for _, t := range addresses.targetIPs { pingSync.Add(1) - go func(target string) { + go func(gwIP string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + gomega.Eventually(e2ekubectl.RunKubectl). + WithArguments(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c1", "-W1", gwIP). + WithTimeout(testTimeout). + ShouldNot(gomega.BeEmpty(), "Failed to ping remote gateway %s from pod %s", gwIP, srcPingPodName) }(t) } pingSync.Wait() @@ -2445,7 +2606,7 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback "+ "address via a pod when deleting the annotation and supported by a CR with the same gateway IPs", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + func(protocol string, addresses *gatewayTestIPs, gwPort, podPort int) { if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } @@ -2455,12 +2616,12 @@ var _ = ginkgo.Describe("External Gateway", func() { annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) for _, container := range gwContainers { - reachPodFromGateway(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + reachPodFromGateway(container, addresses.srcPodIP, strconv.Itoa(podPort), srcPingPodName, protocol) } expectedHostNames := make(map[string]struct{}) for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") + res, err := infraprovider.Get().ExecExternalContainerCommand(c, []string{"hostname"}) framework.ExpectNoError(err, "failed to run hostname in %s", c) hostname := strings.TrimSuffix(res, "\n") framework.Logf("Hostname for %s is %s", c, hostname) @@ -2470,17 +2631,17 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Checking that external ips are reachable with both gateways") returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] + gwIP := addresses.targetIPs[0] success := false for i := 0; i < 20; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", gwIP, gwPort)) } res, err := e2ekubectl.RunKubectl(f.Namespace.Name, args...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + framework.ExpectNoError(err, "failed to reach %s (%s)", gwIP, protocol) hostname := strings.TrimSuffix(res, "\n") if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2498,45 +2659,53 @@ var _ = ginkgo.Describe("External Gateway", func() { } checkAPBExternalRouteStatus(defaultPolicyName) }, - ginkgo.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgo.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + ginkgo.Entry("UDP ipv4", "udp", &addressesv4, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv4", "tcp", &addressesv4, gwTCPPort, podTCPPort), + ginkgo.Entry("UDP ipv6", "udp", &addressesv6, gwUDPPort, podUDPPort), + ginkgo.Entry("TCP ipv6", "tcp", &addressesv6, gwTCPPort, podTCPPort)) }) var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - srcPodName string = "e2e-exgw-src-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" ) var ( servingNamespace string addressesv4, addressesv6 gatewayTestIPs sleepCommand []string - nodes *v1.NodeList + nodes *corev1.NodeList err error - clientSet kubernetes.Interface + gwContainers []infraapi.ExternalContainer + providerCtx infraapi.Context + network infraapi.Network ) f := wrappedTestFramework(svcname) ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach + providerCtx = infraprovider.Get().NewTestContext() // retrieve worker node names - nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), clientSet, 3) + nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { framework.Failf( "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err = infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } @@ -2544,7 +2713,7 @@ var _ = ginkgo.Describe("External Gateway", func() { framework.ExpectNoError(err) servingNamespace = ns.Name - addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + gwContainers, addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, srcPodName) sleepCommand = []string{"bash", "-c", "sleep 20000"} _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"gatewayPod": "true"}) framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) @@ -2553,10 +2722,6 @@ var _ = ginkgo.Describe("External Gateway", func() { }) ginkgo.AfterEach(func() { - // tear down the containers and pods simulating the gateways - ginkgo.By("Deleting the gateway containers") - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) deleteAPBExternalRouteCR(defaultPolicyName) resetGatewayAnnotations(f) }) @@ -2568,22 +2733,16 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Annotate the app namespace to get managed by external gateways") annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + for i, container := range gwContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + _, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{"iperf3", "-u", "-c", addresses.srcPodIP, + "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"}) + networkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(container, network) + framework.ExpectNoError(err, "failed to get network %s information for external container %s", network.Name(), container.Name) // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInfo.MAC, ":", "", -1), "0") } nodeName := getPod(f, srcPodName).Spec.NodeName @@ -2641,22 +2800,17 @@ var _ = ginkgo.Describe("External Gateway", func() { } annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { + for i, container := range gwContainers { ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + _, err = infraprovider.Get().ExecExternalContainerCommand(container, []string{"iperf3", "-u", "-c", addresses.srcPodIP, + "-p", fmt.Sprintf("%d", 5201+i), "-b", "1M", "-i", "1", "-t", "3", "&"}) + framework.ExpectNoError(err, "failed to execute iperf client command from external container") + networkInfo, err := infraprovider.Get().GetExternalContainerNetworkInterface(container, network) + framework.ExpectNoError(err, "failed to get network %s information for external container %s", network.Name(), container.Name) // Trim leading 0s because conntrack dumped labels are just integers // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + macAddressGW[i] = strings.TrimLeft(strings.Replace(networkInfo.MAC, ":", "", -1), "0") } ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") @@ -2677,21 +2831,23 @@ var _ = ginkgo.Describe("External Gateway", func() { var _ = ginkgo.Context("When validating the Admin Policy Based External Route status", func() { const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 - duplicatedPolicy = "duplicated" + svcname string = "novxlan-externalgw-ecmp" + gwContainer1Template string = "gw-test-container1-%d" + gwContainer2Template string = "gw-test-container2-%d" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + duplicatedPolicy = "duplicated" ) f := wrappedTestFramework(svcname) - var addressesv4 gatewayTestIPs + var ( + addressesv4 gatewayTestIPs + providerCtx infraapi.Context + ) ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 3 { @@ -2699,16 +2855,21 @@ var _ = ginkgo.Describe("External Gateway", func() { "Test requires >= 3 Ready nodes, but there are only %v nodes", len(nodes.Items)) } - - if externalContainerNetwork == "host" { + network, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network information") + if overrideNetworkName, _, _ := getOverrideNetwork(); overrideNetworkName != "" { + overrideNetwork, err := infraprovider.Get().GetNetwork(overrideNetworkName) + framework.ExpectNoError(err, "over ride network must exist") + network = overrideNetwork + } + if network.Name() == "host" { skipper.Skipf("Skipping as host network doesn't support multiple external gateways") } - _, addressesv4, _ = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + _, addressesv4, _ = setupGatewayContainers(f, providerCtx, nodes, network, gwContainer1Template, gwContainer2Template, srcPodName, + gwUDPPort, gwTCPPort, podUDPPort, podTCPPort, ecmpRetry, false) }) ginkgo.AfterEach(func() { - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) deleteAPBExternalRouteCR(defaultPolicyName) deleteAPBExternalRouteCR(duplicatedPolicy) }) @@ -2726,31 +2887,51 @@ var _ = ginkgo.Describe("External Gateway", func() { // setupGatewayContainers sets up external containers, adds routes to the nodes, sets up udp / tcp listeners // that return the container's hostname. // All its needed for namespace / pod gateway tests. -func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, container1, container2, srcPodName string, updPort, tcpPort, numOfIPs int, postCreations ...func(string)) ([]string, gatewayTestIPs, gatewayTestIPs) { - gwContainers := []string{container1, container2} +func setupGatewayContainers(f *framework.Framework, providerCtx infraapi.Context, nodes *corev1.NodeList, network infraapi.Network, container1Template, container2Template, + srcPodName string, gwUDPPort, gwTCPPort, podUDPPort, podHTTPPort, numOfIPs int, setupBFD bool) ([]infraapi.ExternalContainer, gatewayTestIPs, gatewayTestIPs) { + + var err error + externalContainer1 := infraapi.ExternalContainer{Name: getContainerName(container1Template, uint16(gwTCPPort)), + Image: externalContainerImage, Network: network, Args: []string{}, ExtPort: uint16(gwTCPPort)} + externalContainer2 := infraapi.ExternalContainer{Name: getContainerName(container2Template, uint16(gwTCPPort)), + Image: externalContainerImage, Network: network, Args: []string{}, ExtPort: uint16(gwTCPPort)} + + gwContainers := []infraapi.ExternalContainer{externalContainer1, externalContainer2} addressesv4 := gatewayTestIPs{targetIPs: make([]string, 0)} addressesv6 := gatewayTestIPs{targetIPs: make([]string, 0)} ginkgo.By("Creating the gateway containers for the icmp test") - if externalContainerNetwork == "host" { - gwContainers = []string{container1} - ipv4 := net.ParseIP(externalContainerIPv4) - if ipv4 == nil || ipv4.To4() == nil { - framework.Fail(fmt.Sprintf("OVN_TEST_EX_GW_IPV4 is invalid: %s", externalContainerIPv4)) + // for host networked containers, we don't look-up the IP addresses and instead rely on overrides. container engine + // is unable to supply this IP information. + if network.Name() == "host" { + gwContainers = []infraapi.ExternalContainer{externalContainer1} + externalContainer1, err = providerCtx.CreateExternalContainer(externalContainer1) + framework.ExpectNoError(err, "failed to create external container: %s", externalContainer1.String()) + providerCtx.AddCleanUpFn(func() error { + return providerCtx.DeleteExternalContainer(externalContainer1) + }) + overrideNetwork, ipv4, ipv6 := getOverrideNetwork() + gomega.Expect(overrideNetwork).Should(gomega.Equal(network.Name()), "network is 'host' for external container, therefore require host IP information") + // TODO; why do we require both IPs? + if ipv4 == "" && ipv6 == "" { + framework.Failf("host network is specified therefore, IPs must be defined for the container") } - ipv6 := net.ParseIP(externalContainerIPv6) - if ipv6 == nil || ipv6.To4() != nil { - framework.Fail(fmt.Sprintf("OVN_TEST_EX_GW_IPV6 is invalid: %s", externalContainerIPv6)) + if ipv4 != "" { + addressesv4.gatewayIPs = append(addressesv4.gatewayIPs, ipv4) + } + if ipv6 != "" { + addressesv6.gatewayIPs = append(addressesv6.gatewayIPs, ipv6) } - - _, _ = createClusterExternalContainer(gwContainers[0], externalContainerImage, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) - addressesv4.gatewayIPs = append(addressesv4.gatewayIPs, externalContainerIPv4) - addressesv6.gatewayIPs = append(addressesv6.gatewayIPs, externalContainerIPv6) } else { - for _, gwContainer := range gwContainers { - gwipv4, gwipv6 := createClusterExternalContainer(gwContainer, externalContainerImage, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) - addressesv4.gatewayIPs = append(addressesv4.gatewayIPs, gwipv4) - addressesv6.gatewayIPs = append(addressesv6.gatewayIPs, gwipv6) + for i, gwContainer := range gwContainers { + gwContainers[i], err = providerCtx.CreateExternalContainer(gwContainer) + framework.ExpectNoError(err, "failed to create external container: %s", gwContainer.String()) + if gwContainers[i].GetIPv4() != "" { + addressesv4.gatewayIPs = append(addressesv4.gatewayIPs, gwContainers[i].GetIPv4()) + } + if gwContainers[i].GetIPv6() != "" { + addressesv6.gatewayIPs = append(addressesv6.gatewayIPs, gwContainers[i].GetIPv6()) + } } } @@ -2769,14 +2950,16 @@ func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, containe node := nodes.Items[0] // we must use container network for second bridge scenario // for host network we can use the node's ip - if externalContainerNetwork != "host" { - addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) + if network.Name() != "host" { + nodeInf, err := infraprovider.Get().GetK8NodeNetworkInterface(node.Name, network) + framework.ExpectNoError(err, "failed to get network interface info for an interface on network %s within node %s", network.Name(), node.Name) + addressesv4.nodeIP, addressesv6.nodeIP = nodeInf.IPv4, nodeInf.IPv6 } else { - nodeList := &v1.NodeList{} + nodeList := &corev1.NodeList{} nodeList.Items = append(nodeList.Items, node) - addressesv4.nodeIP = e2enode.FirstAddressByTypeAndFamily(nodeList, v1.NodeInternalIP, v1.IPv4Protocol) - addressesv6.nodeIP = e2enode.FirstAddressByTypeAndFamily(nodeList, v1.NodeInternalIP, v1.IPv6Protocol) + addressesv4.nodeIP = e2enode.FirstAddressByTypeAndFamily(nodeList, corev1.NodeInternalIP, corev1.IPv4Protocol) + addressesv6.nodeIP = e2enode.FirstAddressByTypeAndFamily(nodeList, corev1.NodeInternalIP, corev1.IPv6Protocol) } framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) @@ -2785,10 +2968,10 @@ func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, containe args := []string{ "netexec", - fmt.Sprintf("--http-port=%d", srcHTTPPort), - fmt.Sprintf("--udp-port=%d", srcUDPPort), + fmt.Sprintf("--http-port=%d", podHTTPPort), + fmt.Sprintf("--udp-port=%d", podUDPPort), } - clientPod, err := createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { + clientPod, err := createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { p.Spec.Containers[0].Args = args }) @@ -2815,60 +2998,83 @@ func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, containe } // This sets up a listener that replies with the hostname, both on tcp and on udp - setupListenersOrDie := func(container, address string) { - cmd := []string{containerRuntime, "exec", container, "bash", "-c", fmt.Sprintf("while true; do echo $(hostname) | nc -l -u %s %d; done &", address, updPort)} - _, err = runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup UDP listener for %s on %s", address, container) - - cmd = []string{containerRuntime, "exec", container, "bash", "-c", fmt.Sprintf("while true; do echo $(hostname) | nc -l %s %d; done &", address, tcpPort)} - _, err = runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup TCP listener for %s on %s", address, container) + setupListenersOrDie := func(container infraapi.ExternalContainer, gwAddress string) { + _, err = infraprovider.Get().ExecExternalContainerCommand(container, + []string{"bash", "-c", fmt.Sprintf("while true; do echo $(hostname) | nc -l -u %s %d; done &", gwAddress, gwUDPPort)}) + framework.ExpectNoError(err, "failed to setup UDP listener for %s on %s", gwAddress, container) + + _, err = infraprovider.Get().ExecExternalContainerCommand(container, + []string{"bash", "-c", fmt.Sprintf("while true; do echo $(hostname) | nc -l %s %d; done &", gwAddress, gwTCPPort)}) + framework.ExpectNoError(err, "failed to setup TCP listener for %s on %s", gwAddress, container) } // The target ips are addresses added to the lo of each container. // By setting the gateway annotation and using them as destination, we verify that // the routing is able to reach the containers. // A route back to the src pod must be set in order for the ping reply to work. - for _, containerName := range gwContainers { + for _, gwContainer := range gwContainers { if testIPv4 { - ginkgo.By(fmt.Sprintf("Setting up the destination ips to %s", containerName)) + ginkgo.By(fmt.Sprintf("Setting up the destination ips to %s", gwContainer.Name)) for _, address := range addressesv4.targetIPs { - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "address", "add", address+"/32", "dev", "lo") - framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the test container %s", containerName) + framework.Logf("adding IP %q to gateway container %q", address, gwContainer.Name) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "address", "add", address + "/32", "dev", "lo"}) + framework.ExpectNoError(err, "failed to add the loopback ip to dev lo on the test container %s", gwContainer.Name) + providerCtx.AddCleanUpFn(func() error { + infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "address", "del", address + "/32", "dev", "lo"}) + return nil + }) } - ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod", containerName)) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "route", "add", addressesv4.srcPodIP, "via", addressesv4.nodeIP) - framework.ExpectNoError(err, "failed to add the pod host route on the test container %s", containerName) + ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod", gwContainer.Name)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "route", "add", addressesv4.srcPodIP, "via", addressesv4.nodeIP}) + framework.ExpectNoError(err, "failed to add the pod host route on the test container %s", gwContainer.Name) + providerCtx.AddCleanUpFn(func() error { + infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "route", "del", addressesv4.srcPodIP, "via", addressesv4.nodeIP}) + return nil + }) + + // cluster nodes don't know where to send ARP replies to requests + // from the IPs that we just added to the containers so force an + // entry on the neighbor table with a ping. This speeds up the tests + // which would otherwise eventually discover the neighbor through + // other link layer protocols. + ginkgo.By(fmt.Sprintf("Adding node %s as neighbor of %s", addressesv4.nodeIP, gwContainer.Name)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ping", "-c1", addressesv4.nodeIP}) + framework.ExpectNoError(err, "failed to add node %s as neighbor of %s", addressesv4.nodeIP, gwContainer.Name) ginkgo.By("Setting up the listeners on the gateway") - setupListenersOrDie(containerName, addressesv4.targetIPs[0]) + setupListenersOrDie(gwContainer, addressesv4.targetIPs[0]) } if testIPv6 { - ginkgo.By(fmt.Sprintf("Setting up the destination ips to %s (ipv6)", containerName)) + ginkgo.By(fmt.Sprintf("Setting up the destination ips to %s (ipv6)", gwContainer.Name)) for _, address := range addressesv6.targetIPs { - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "address", "add", address+"/128", "dev", "lo") - framework.ExpectNoError(err, "ipv6: failed to add the loopback ip to dev lo on the test container %s", containerName) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "address", "add", address + "/128", "dev", "lo"}) + framework.ExpectNoError(err, "ipv6: failed to add the loopback ip to dev lo on the test container %s", gwContainer.Name) + providerCtx.AddCleanUpFn(func() error { + infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "address", "del", address + "/128", "dev", "lo"}) + return nil + }) } - ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod (ipv6)", containerName)) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "-6", "route", "add", addressesv6.srcPodIP, "via", addressesv6.nodeIP) - framework.ExpectNoError(err, "ipv6: failed to add the pod host route on the test container %s", containerName) + ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod (ipv6)", gwContainer.Name)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwContainer, []string{"ip", "-6", "route", "add", addressesv6.srcPodIP, "via", addressesv6.nodeIP}) + framework.ExpectNoError(err, "ipv6: failed to add the pod host route on the test container %s", gwContainer.Name) ginkgo.By("Setting up the listeners on the gateway (v6)") - setupListenersOrDie(containerName, addressesv6.targetIPs[0]) + setupListenersOrDie(gwContainer, addressesv6.targetIPs[0]) } } - for _, containerName := range gwContainers { - for _, postCreation := range postCreations { - postCreation(containerName) + if setupBFD { + for _, gwContainer := range gwContainers { + setupBFDOnExternalContainer(network, gwContainer, nodes.Items) } } + return gwContainers, addressesv4, addressesv6 } -func setupAnnotatedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { +func setupAnnotatedGatewayPods(f *framework.Framework, nodes *corev1.NodeList, network infraapi.Network, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { gwPods := []string{pod1, pod2} - if externalContainerNetwork == "host" { + if network.Name() == "host" { gwPods = []string{pod1} } @@ -2898,9 +3104,9 @@ func setupAnnotatedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, return gwPods } -func setupPolicyBasedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs) []string { +func setupPolicyBasedGatewayPods(f *framework.Framework, nodes *corev1.NodeList, network infraapi.Network, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs) []string { gwPods := []string{pod1, pod2} - if externalContainerNetwork == "host" { + if network.Name() == "host" { gwPods = []string{pod1} } @@ -2923,39 +3129,48 @@ func setupPolicyBasedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod return gwPods } -func cleanExGWContainers(clientSet kubernetes.Interface, gwContainers []string, addressesv4, addressesv6 gatewayTestIPs) { - ginkgo.By("Deleting the gateway containers") - if externalContainerNetwork == "host" { - cleanRoutesAndIPs(gwContainers[0], addressesv4, addressesv6) - deleteClusterExternalContainer(gwContainers[0]) - } else { - for _, container := range gwContainers { - deleteClusterExternalContainer(container) - } - } -} - // setupGatewayContainersForConntrackTest sets up iperf3 external containers, adds routes to src // pods via the nodes, starts up iperf3 server on src-pod -func setupGatewayContainersForConntrackTest(f *framework.Framework, nodes *v1.NodeList, gwContainer1, gwContainer2, srcPodName string) (gatewayTestIPs, gatewayTestIPs) { +func setupGatewayContainersForConntrackTest(f *framework.Framework, providerCtx infraapi.Context, nodes *corev1.NodeList, network infraapi.Network, + gwContainer1Template, gwContainer2Template string, srcPodName string) ([]infraapi.ExternalContainer, gatewayTestIPs, gatewayTestIPs) { + var ( err error - clientPod *v1.Pod + clientPod *corev1.Pod ) + if network.Name() == "host" { + panic("not supported") + } addressesv4 := gatewayTestIPs{gatewayIPs: make([]string, 2)} addressesv6 := gatewayTestIPs{gatewayIPs: make([]string, 2)} ginkgo.By("Creating the gateway containers for the UDP test") - addressesv4.gatewayIPs[0], addressesv6.gatewayIPs[0] = createClusterExternalContainer(gwContainer1, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) - addressesv4.gatewayIPs[1], addressesv6.gatewayIPs[1] = createClusterExternalContainer(gwContainer2, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) - + gwExternalContainer1 := infraapi.ExternalContainer{Name: getContainerName(gwContainer1Template, 12345), + Image: images.IPerf3(), Network: network, Args: []string{}, ExtPort: 12345} + gwExternalContainer1, err = providerCtx.CreateExternalContainer(gwExternalContainer1) + framework.ExpectNoError(err, "failed to create external container (%s)", gwExternalContainer1) + + gwExternalContainer2 := infraapi.ExternalContainer{Name: getContainerName(gwContainer2Template, 12345), + Image: images.IPerf3(), Network: network, Args: []string{}, ExtPort: 12345} + gwExternalContainer2, err = providerCtx.CreateExternalContainer(gwExternalContainer2) + framework.ExpectNoError(err, "failed to create external container (%s)", gwExternalContainer2) + if network.Name() == "host" { + // manually cleanup because cleanup doesnt cleanup host network + providerCtx.AddCleanUpFn(func() error { + return providerCtx.DeleteExternalContainer(gwExternalContainer2) + }) + } + addressesv4.gatewayIPs[0], addressesv6.gatewayIPs[0] = gwExternalContainer1.GetIPv4(), gwExternalContainer1.GetIPv6() + addressesv4.gatewayIPs[1], addressesv6.gatewayIPs[1] = gwExternalContainer2.GetIPv4(), gwExternalContainer2.GetIPv6() + gwExternalContainers := []infraapi.ExternalContainer{gwExternalContainer1, gwExternalContainer2} node := nodes.Items[0] ginkgo.By("Creating the source pod to reach the destination ips from") - clientPod, err = createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { - p.Spec.Containers[0].Image = iperf3Image + clientPod, err = createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = images.IPerf3() }) framework.ExpectNoError(err) - - addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) + networkInfo, err := infraprovider.Get().GetK8NodeNetworkInterface(node.Name, network) + framework.ExpectNoError(err, "failed to get kubernetes node %s network information for network %s", node.Name, network.Name()) + addressesv4.nodeIP, addressesv6.nodeIP = networkInfo.IPv4, networkInfo.IPv6 framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) // start iperf3 servers at ports 5201 and 5202 on the src app pod @@ -2984,34 +3199,49 @@ func setupGatewayContainersForConntrackTest(f *framework.Framework, nodes *v1.No } // A route back to the src pod must be set in order for the ping reply to work. - for _, containerName := range []string{gwContainer1, gwContainer2} { - ginkgo.By(fmt.Sprintf("Install iproute in %s", containerName)) - _, err = runCommand(containerRuntime, "exec", containerName, "dnf", "install", "-y", "iproute") - framework.ExpectNoError(err, "failed to install iproute package on the test container %s", containerName) + for _, gwExternalContainer := range gwExternalContainers { + ginkgo.By(fmt.Sprintf("Install iproute in %s", gwExternalContainer.Name)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwExternalContainer, []string{"dnf", "install", "-y", "iproute"}) + framework.ExpectNoError(err, "failed to install iproute package on the test container %s", gwExternalContainer.Name) if testIPv4 { - ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod with IP %s", containerName, addressesv4.srcPodIP)) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "route", "add", addressesv4.srcPodIP, "via", addressesv4.nodeIP, "dev", "eth0") - framework.ExpectNoError(err, "failed to add the pod host route on the test container %s", containerName) + ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod with IP %s", gwExternalContainer.Name, addressesv4.srcPodIP)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwExternalContainer, []string{"ip", "-4", "route", "add", addressesv4.srcPodIP, + "via", addressesv4.nodeIP, "dev", infraprovider.Get().ExternalContainerPrimaryInterfaceName()}) + framework.ExpectNoError(err, "failed to add the pod host route on the test container %s", gwExternalContainer.Name) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(gwExternalContainer, []string{"ip", "-4", "route", "del", addressesv4.srcPodIP, + "via", addressesv4.nodeIP, "dev", infraprovider.Get().ExternalContainerPrimaryInterfaceName()}) + if err != nil { + return fmt.Errorf("failed to remove IPv4 route from external container %s: %v", gwExternalContainer.Name, err) + } + return nil + }) } if testIPv6 { - ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod (ipv6)", containerName)) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", "-6", "route", "add", addressesv6.srcPodIP, "via", addressesv6.nodeIP) - framework.ExpectNoError(err, "ipv6: failed to add the pod host route on the test container %s", containerName) + ginkgo.By(fmt.Sprintf("Adding a route from %s to the src pod (ipv6)", gwExternalContainer.Name)) + _, err = infraprovider.Get().ExecExternalContainerCommand(gwExternalContainer, []string{"ip", "-6", "route", "add", addressesv6.srcPodIP, "via", addressesv6.nodeIP}) + framework.ExpectNoError(err, "ipv6: failed to add the pod host route on the test container %s", gwExternalContainer) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecExternalContainerCommand(gwExternalContainer, []string{"ip", "-6", "route", "del", addressesv6.srcPodIP, "via", addressesv6.nodeIP}) + if err != nil { + return fmt.Errorf("failed to delete IPv6 route from external container %s: %v", gwExternalContainer.Name, err) + } + return nil + }) } } - return addressesv4, addressesv6 + return gwExternalContainers, addressesv4, addressesv6 } -func reachPodFromGateway(targetAddress, targetPort, targetPodName, srcContainer, protocol string) { +func reachPodFromGateway(srcContainer infraapi.ExternalContainer, targetAddress, targetPort, targetPodName, protocol string) { ginkgo.By(fmt.Sprintf("Checking that %s can reach the pod", srcContainer)) - dockerCmd := []string{containerRuntime, "exec", srcContainer, "bash", "-c"} + var cmd []string if protocol == "tcp" { - dockerCmd = append(dockerCmd, fmt.Sprintf("curl -s http://%s/hostname", net.JoinHostPort(targetAddress, targetPort))) + cmd = []string{"curl", "-s", fmt.Sprintf("http://%s/hostname", net.JoinHostPort(targetAddress, targetPort))} } else { - dockerCmd = append(dockerCmd, fmt.Sprintf("cat <(echo hostname) <(sleep 1) | nc -u %s %s", targetAddress, targetPort)) + cmd = []string{"bash", "-c", "cat <(echo hostname) <(sleep 1) | nc -u " + targetAddress + " " + targetPort} } - - res, err := runCommand(dockerCmd...) + res, err := infraprovider.Get().ExecExternalContainerCommand(srcContainer, cmd) framework.ExpectNoError(err, "Failed to reach pod %s (%s) from external container %s", targetAddress, protocol, srcContainer) gomega.Expect(strings.Trim(res, "\n")).To(gomega.Equal(targetPodName)) } @@ -3224,23 +3454,23 @@ func formatDynamicHops(bfd bool, servingNamespace string) string { return b.String() } -func getGatewayPod(f *framework.Framework, podNamespace, podName string) *v1.Pod { +func getGatewayPod(f *framework.Framework, podNamespace, podName string) *corev1.Pod { pod, err := f.ClientSet.CoreV1().Pods(podNamespace).Get(context.Background(), podName, metav1.GetOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to get pod: %s, err: %v", podName, err)) return pod } -func hostNamesForContainers(containers []string) map[string]struct{} { +func hostNamesForExternalContainers(containers []infraapi.ExternalContainer) map[string]struct{} { res := make(map[string]struct{}) for _, c := range containers { - hostName := hostNameForContainer(c) + hostName := hostNameForExternalContainer(c) res[hostName] = struct{}{} } return res } -func hostNameForContainer(container string) string { - res, err := runCommand(containerRuntime, "exec", container, "hostname") +func hostNameForExternalContainer(container infraapi.ExternalContainer) string { + res, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"hostname"}) framework.ExpectNoError(err, "failed to run hostname in %s", container) framework.Logf("Hostname for %s is %s", container, res) return strings.TrimSuffix(res, "\n") @@ -3262,10 +3492,11 @@ func pokeHostnameViaNC(podName, namespace, protocol, target string, port int) st // pokeConntrackEntries returns the number of conntrack entries that match the provided pattern, protocol and podIP func pokeConntrackEntries(nodeName, podIP, protocol string, patterns []string) int { args := []string{"get", "pods", "--selector=app=ovs-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", nodeName), "-o", "jsonpath={.items..metadata.name}"} - ovsPodName, err := e2ekubectl.RunKubectl(ovnNamespace, args...) + ovnKubernetesNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + ovsPodName, err := e2ekubectl.RunKubectl(ovnKubernetesNamespace, args...) framework.ExpectNoError(err, "failed to get the ovs pod on node %s", nodeName) args = []string{"exec", ovsPodName, "--", "ovs-appctl", "dpctl/dump-conntrack"} - conntrackEntries, err := e2ekubectl.RunKubectl(ovnNamespace, args...) + conntrackEntries, err := e2ekubectl.RunKubectl(ovnKubernetesNamespace, args...) framework.ExpectNoError(err, "failed to get the conntrack entries from node %s", nodeName) numOfConnEntries := 0 for _, connEntry := range strings.Split(conntrackEntries, "\n") { @@ -3286,30 +3517,31 @@ func pokeConntrackEntries(nodeName, podIP, protocol string, patterns []string) i return numOfConnEntries } -func setupBFDOnContainer(nodes []v1.Node) func(string) { - return func(containerName string) { - // we set a bfd peer for each address of each node - for _, node := range nodes { - // we must use container network for second bridge scenario - // for host network we can use the node's ip - var ipv4, ipv6 string - if externalContainerNetwork != "host" { - ipv4, ipv6 = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) - } else { - nodeList := &v1.NodeList{} - nodeList.Items = append(nodeList.Items, node) +func setupBFDOnExternalContainer(network infraapi.Network, container infraapi.ExternalContainer, nodes []corev1.Node) { + // we set a bfd peer for each address of each node + for _, node := range nodes { + // we must use container network for second bridge scenario + // for host network we can use the node's ip + var ipv4, ipv6 string + if network.Name() != "host" { + networkInfo, err := infraprovider.Get().GetK8NodeNetworkInterface(node.Name, network) + framework.ExpectNoError(err, "failed to get network information from node %s for network %s", node.Name, network.Name()) + ipv4, ipv6 = networkInfo.IPv4, networkInfo.IPv6 + } else { + nodeList := &corev1.NodeList{} + nodeList.Items = append(nodeList.Items, node) + + ipv4 = e2enode.FirstAddressByTypeAndFamily(nodeList, corev1.NodeInternalIP, corev1.IPv4Protocol) + ipv6 = e2enode.FirstAddressByTypeAndFamily(nodeList, corev1.NodeInternalIP, corev1.IPv6Protocol) + } - ipv4 = e2enode.FirstAddressByTypeAndFamily(nodeList, v1.NodeInternalIP, v1.IPv4Protocol) - ipv6 = e2enode.FirstAddressByTypeAndFamily(nodeList, v1.NodeInternalIP, v1.IPv6Protocol) + for _, a := range []string{ipv4, ipv6} { + if a == "" { + continue } - - for _, a := range []string{ipv4, ipv6} { - if a == "" { - continue - } - // Configure the node as a bfd peer on the frr side - cmd := []string{containerRuntime, "exec", containerName, "bash", "-c", - fmt.Sprintf(`cat << EOF >> /etc/frr/frr.conf + // Configure the node as a bfd peer on the frr side + _, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"bash", "-c", + fmt.Sprintf(`cat << EOF >> /etc/frr/frr.conf bfd peer %s @@ -3317,70 +3549,29 @@ bfd ! ! EOF -`, a)} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup FRR peer %s in %s", a, containerName) - } +`, a)}) + framework.ExpectNoError(err, "failed to setup FRR peer %s in %s", a, container) } - cmd := []string{containerRuntime, "exec", containerName, "/usr/libexec/frr/frrinit.sh", "start"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to start frr in %s", containerName) } + _, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"/usr/libexec/frr/frrinit.sh", "start"}) + framework.ExpectNoError(err, "failed to start frr in %s", container) } -func isBFDPaired(container, peer string) bool { - res, err := runCommand(containerRuntime, "exec", container, "bash", "-c", fmt.Sprintf("vtysh -c \"show bfd peer %s\"", peer)) - framework.ExpectNoError(err, "failed to check bfd status in %s", container) - return strings.Contains(res, "Status: up") -} - -// When running on host network we clean the routes and ips we added previously -func cleanRoutesAndIPs(containerName string, addressesv4, addressesv6 gatewayTestIPs) { - testIPv6 := addressesv6.srcPodIP != "" && addressesv6.nodeIP != "" - testIPv4 := addressesv4.srcPodIP != "" && addressesv4.nodeIP != "" - - if testIPv4 { - cleanRoutesAndIPsForFamily(containerName, 4, addressesv4) +func isBFDPaired(container infraapi.ExternalContainer, peer string) (bool, error) { + res, err := infraprovider.Get().ExecExternalContainerCommand(container, []string{"bash", "-c", fmt.Sprintf("vtysh -c \"show bfd peer %s\"", peer)}) + if err != nil { + return false, fmt.Errorf("failed to check bfd status in %s: %w", container, err) } - if testIPv6 { - cleanRoutesAndIPsForFamily(containerName, 6, addressesv6) - } -} - -func cleanRoutesAndIPsForFamily(containerName string, family int, addresses gatewayTestIPs) { - var err error - var addressMask string - addressSelector := fmt.Sprintf("-%d", family) - mask := 32 - if family == 6 { - mask = 128 - } - ginkgo.By(fmt.Sprintf("Removing the destination ips from %s (IPv%d)", containerName, family)) - for _, address := range addresses.targetIPs { - addressMask = fmt.Sprintf("%s/%d", address, mask) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", addressSelector, "address", "del", addressMask, "dev", "lo") - framework.ExpectNoError(err, "failed to del the loopback ip %s from dev lo on the test container %s", addressMask, containerName) - } - - ginkgo.By(fmt.Sprintf("Removing the route from %s to the src pod (IPv%d)", containerName, family)) - _, err = runCommand(containerRuntime, "exec", containerName, "ip", addressSelector, "route", "del", addresses.srcPodIP, "via", addresses.nodeIP) - framework.ExpectNoError(err, "failed to del the pod host route on the test container %s", containerName) + return strings.Contains(res, "Status: up"), nil } -func setMACAddrOnContainer(containerName, addr, link string) { - _, err := runCommand(containerRuntime, "exec", containerName, "ip", "link", "set", "dev", link, "addr", addr) - framework.ExpectNoError(err, "failed to set MAC address on container %s", containerName) -} - -func checkReceivedPacketsOnContainer(container, srcPodName, link string, filter []string, wg *sync.WaitGroup) { +func checkReceivedPacketsOnExternalContainer(container infraapi.ExternalContainer, srcPodName, link string, filter []string, wg *sync.WaitGroup) { defer ginkgo.GinkgoRecover() defer wg.Done() if len(link) == 0 { link = anyLink } - args := []string{containerRuntime, "exec", container, "timeout", "60", "tcpdump", "-c", "1", "-i", link} - args = append(args, filter...) - _, err := runCommand(args...) + _, err := infraprovider.Get().ExecExternalContainerCommand(container, append([]string{"timeout", "60", "tcpdump", "-c", "1", "-i", link}, filter...)) framework.ExpectNoError(err, "Failed to detect packets from %s on gateway %s", srcPodName, container) framework.Logf("Packet successfully detected on gateway %s", container) } @@ -3403,17 +3594,3 @@ func resetGatewayAnnotations(f *framework.Framework) { annotation}...) } } - -func sendGARP(container, ip, link string) { - ginkgo.By(fmt.Sprintf("Sending ARP on container %s, for IP: %s", container, ip)) - args := []string{containerRuntime, "exec", container, "arping", "-U", ip, "-I", link, "-c", "1", "-s", ip} - _, err := runCommand(args...) - framework.ExpectNoError(err, "Failed to send arping on container: %s", container) -} - -func sendNDPAdvertisement(container, ip, link string) { - ginkgo.By(fmt.Sprintf("Sending NDP Unsolicited NA on container %s, for IP: %s", container, ip)) - args := []string{containerRuntime, "exec", container, "ndptool", "-t", "na", "-U", "-i", link, "-T", ip, "send"} - _, err := runCommand(args...) - framework.ExpectNoError(err, "Failed to send NDP unsolicted advertisement on container: %s", container) -} diff --git a/test/e2e/go.mod b/test/e2e/go.mod index c267a34ba2..6a865f71ee 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -175,7 +175,6 @@ require ( k8s.io/cri-client v0.32.3 // indirect k8s.io/csi-translation-lib v0.32.3 // indirect k8s.io/dynamic-resource-allocation v0.32.3 // indirect - k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kms v0.32.3 // indirect k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect k8s.io/kube-scheduler v0.32.3 // indirect @@ -201,6 +200,7 @@ require ( go.universe.tf/metallb v0.0.0-00010101000000-000000000000 google.golang.org/grpc v1.65.0 gopkg.in/yaml.v2 v2.4.0 + k8s.io/klog/v2 v2.130.1 k8s.io/kubectl v0.32.3 kubevirt.io/api v1.4.0 sigs.k8s.io/controller-runtime v0.20.3 diff --git a/test/e2e/images/images.go b/test/e2e/images/images.go new file mode 100644 index 0000000000..472705dddd --- /dev/null +++ b/test/e2e/images/images.go @@ -0,0 +1,25 @@ +package images + +import "os" + +var ( + agnHost = "registry.k8s.io/e2e-test-images/agnhost:2.53" + iperf3 = "quay.io/sronanrh/iperf:latest" +) + +func init() { + if agnHostOverride := os.Getenv("AGNHOST_IMAGE"); agnHostOverride != "" { + agnHost = agnHostOverride + } + if iperf3Override := os.Getenv("IPERF3_IMAGE"); iperf3Override != "" { + iperf3 = iperf3Override + } +} + +func AgnHost() string { + return agnHost +} + +func IPerf3() string { + return iperf3 +} diff --git a/test/e2e/infraprovider/README.md b/test/e2e/infraprovider/README.md new file mode 100644 index 0000000000..6a8be5df14 --- /dev/null +++ b/test/e2e/infraprovider/README.md @@ -0,0 +1,19 @@ +# Infra Provider +Infra Provider provides test dependencies using an infrastructure agnostic API. + +## Motivation +Previous to this API, our tests relied directly on upstream "KinD" to provision networks and launch external hosts. +This prevented downstream consumption of upstream tests. + +## Description +Providers external to the cluster resources including adding external hosts [1] and provisioning networks, +attaching networks, etc. + +[1] deployed as containers on KinD provider but may be deployed as host-networked container downstream. + +Known implementations: +- KinD +- OpenShift + + + diff --git a/test/e2e/infraprovider/api/api.go b/test/e2e/infraprovider/api/api.go new file mode 100644 index 0000000000..5ef104b7f3 --- /dev/null +++ b/test/e2e/infraprovider/api/api.go @@ -0,0 +1,273 @@ +package api + +import ( + "errors" + "fmt" + "strings" +) + +// Provider represents the infrastructure provider +type Provider interface { + // Name returns the name of the provider, example 'kind'. + Name() string + // NewTestContext providers a per test sandbox. Dependent infra test constructs are created within each test and automatically cleaned + // after each test. + NewTestContext() Context + + // PrimaryNetwork returns OVN-Kubernetes primary infrastructure network information + PrimaryNetwork() (Network, error) + // GetNetwork returns a network + GetNetwork(name string) (Network, error) + // GetExternalContainerNetworkInterface fetches network interface information from the external container attached to a specific network + GetExternalContainerNetworkInterface(container ExternalContainer, network Network) (NetworkInterface, error) + GetK8NodeNetworkInterface(instance string, network Network) (NetworkInterface, error) + + // ExecK8NodeCommand executes a command on a K8 Node host network namespace and filesystem + ExecK8NodeCommand(nodeName string, cmd []string) (string, error) + ExecExternalContainerCommand(container ExternalContainer, cmd []string) (string, error) + GetExternalContainerLogs(container ExternalContainer) (string, error) + // GetExternalContainerPort returns a port. Requesting a port that maybe exposed in tests to avoid multiple parallel + // tests utilizing conflicting ports. It also allows infra provider implementations to set the external containers + // allowed port range and therefore comply with cloud provider firewall rules. + GetExternalContainerPort() uint16 + ExternalContainerPrimaryInterfaceName() string + // GetK8HostPort returns a Node port. Requesting a port that maybe exposed in tests to avoid multiple parallel + // tests utilizing conflicting ports. It also allows infra provider implementations to set Nodes + // allowed port range and therefore comply with cloud provider firewall rules. + GetK8HostPort() uint16 // supported K8 host ports +} + +type Context interface { + CreateExternalContainer(container ExternalContainer) (ExternalContainer, error) + DeleteExternalContainer(container ExternalContainer) error + + CreateNetwork(name string, subnets ...string) (Network, error) + DeleteNetwork(network Network) error + AttachNetwork(network Network, instance string) (NetworkInterface, error) + DetachNetwork(network Network, instance string) error + GetAttachedNetworks() (Networks, error) + + AddCleanUpFn(func() error) +} + +type Network interface { + Name() string + IPv4IPv6Subnets() (string, string, error) + Equal(candidate Network) bool + String() string +} + +type Networks struct { + List []Network +} + +func (n *Networks) Contains(network Network) bool { + _, found := n.Get(network.Name()) + return found +} + +func (n *Networks) Get(name string) (Network, bool) { + for _, network := range n.List { + if network.Name() == name { + return network, true + } + } + return nil, false +} + +func (n *Networks) InsertNoDupe(candidate Network) { + var found bool + for _, network := range n.List { + if network.Equal(candidate) { + found = true + break + } + } + if !found { + n.List = append(n.List, candidate) + } +} + +type Attachment struct { + Network Network + Instance string +} + +func (a Attachment) equal(candidate Attachment) bool { + if a.Instance != candidate.Instance { + return false + } + if !a.Network.Equal(candidate.Network) { + return false + } + return true +} + +type Attachments struct { + List []Attachment +} + +func (as *Attachments) InsertNoDupe(candidate Attachment) { + var found bool + for _, existingNetworkAttachment := range as.List { + if existingNetworkAttachment.equal(candidate) { + found = true + break + } + } + if !found { + as.List = append(as.List, candidate) + } +} + +type NetworkInterface struct { + IPv4Gateway string + IPv4 string + IPv4Prefix string + IPv6Gateway string + IPv6 string + IPv6Prefix string + MAC string + InfName string +} + +func (n NetworkInterface) GetName() string { + return n.InfName +} + +func (n NetworkInterface) GetIPv4Gateway() string { + return n.IPv4Gateway +} + +func (n NetworkInterface) GetIPv4() string { + return n.IPv4 +} + +func (n NetworkInterface) GetIPv4Prefix() string { + return n.IPv4Prefix +} + +func (n NetworkInterface) GetIPv6Gateway() string { + return n.IPv4Gateway +} + +func (n NetworkInterface) GetIPv6() string { + return n.IPv6 +} + +func (n NetworkInterface) GetIPv6Prefix() string { + return n.IPv6Prefix +} + +func (n NetworkInterface) GetMAC() string { + return n.MAC +} + +type ExternalContainer struct { + Name string + Image string + Network Network + Args []string + ExtPort uint16 + IPv4 string + IPv6 string +} + +func (ec ExternalContainer) GetName() string { + return ec.Name +} + +func (ec ExternalContainer) GetIPv4() string { + return ec.IPv4 +} + +func (ec ExternalContainer) GetIPv6() string { + return ec.IPv6 +} + +func (ec ExternalContainer) GetPortStr() string { + if ec.ExtPort == 0 { + panic("port isn't defined") + } + return fmt.Sprintf("%d", ec.ExtPort) +} + +func (ec ExternalContainer) GetPort() uint16 { + if ec.ExtPort == 0 { + panic("port isn't defined") + } + return ec.ExtPort +} + +func (ec ExternalContainer) IsIPv4() bool { + return ec.IPv4 != "" +} + +func (ec ExternalContainer) IsIPv6() bool { + return ec.IPv6 != "" +} + +func (ec ExternalContainer) String() string { + str := fmt.Sprintf("Name: %q, Image: %q, Network: %q, Command: %q", ec.Name, ec.Image, ec.Network, strings.Join(ec.Args, " ")) + if ec.IsIPv4() { + str = fmt.Sprintf("%s, IPv4 address: %q", str, ec.GetIPv4()) + } + if ec.IsIPv6() { + str = fmt.Sprintf("%s, IPv6 address: %s", str, ec.GetIPv6()) + } + return str +} + +func (ec ExternalContainer) IsValidPreCreateContainer() (bool, error) { + var errs []error + if ec.Name == "" { + errs = append(errs, errors.New("name is not set")) + } + if ec.Image == "" { + errs = append(errs, errors.New("image is not set")) + } + if ec.Network.String() == "" { + errs = append(errs, errors.New("network is not set")) + } + if ec.ExtPort == 0 { + errs = append(errs, errors.New("port is not set")) + } + if len(errs) == 0 { + return true, nil + } + return false, condenseErrors(errs) +} + +func (ec ExternalContainer) IsValidPostCreate() (bool, error) { + var errs []error + if ec.IPv4 == "" && ec.IPv6 == "" { + errs = append(errs, errors.New("provider did not populate an IPv4 or an IPv6 address")) + } + if len(errs) == 0 { + return true, nil + } + return false, condenseErrors(errs) +} + +func (ec ExternalContainer) IsValidPreDelete() (bool, error) { + if ec.IPv4 == "" && ec.IPv6 == "" { + return false, fmt.Errorf("IPv4 or IPv6 must be set") + } + return true, nil +} + +var NotFound = fmt.Errorf("not found") + +func condenseErrors(errs []error) error { + switch len(errs) { + case 0: + return nil + case 1: + return errs[0] + } + err := errs[0] + for _, e := range errs[1:] { + err = errors.Join(err, e) + } + return err +} diff --git a/test/e2e/infraprovider/portalloc/port_allocator.go b/test/e2e/infraprovider/portalloc/port_allocator.go new file mode 100644 index 0000000000..4b93520c01 --- /dev/null +++ b/test/e2e/infraprovider/portalloc/port_allocator.go @@ -0,0 +1,28 @@ +package portalloc + +import "sync" + +type PortAllocator struct { + start uint16 + end uint16 + next uint16 + mu sync.Mutex +} + +func New(start, end uint16) *PortAllocator { + return &PortAllocator{start: start, end: end, next: start, mu: sync.Mutex{}} +} + +func (pr *PortAllocator) Allocate() uint16 { + pr.mu.Lock() + defer pr.mu.Unlock() + val := pr.next + if val == 0 { + panic("port allocation is zero and may have been exhausted") + } + pr.next += 1 + if pr.next > pr.end { + panic("port allocation limit reached") + } + return val +} diff --git a/test/e2e/infraprovider/provider.go b/test/e2e/infraprovider/provider.go new file mode 100644 index 0000000000..5f53b35cd5 --- /dev/null +++ b/test/e2e/infraprovider/provider.go @@ -0,0 +1,37 @@ +package infraprovider + +import ( + "fmt" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/providers/kind" + + "k8s.io/client-go/rest" +) + +type Name string + +func (n Name) String() string { + return string(n) +} + +var provider api.Provider + +// Set detects which infrastructure provider. Arg config is not needed for KinD provider but downstream implementations +// will require access to the kapi to infer what platform k8 is running on. +func Set(_ *rest.Config) error { + // detect if the provider is KinD + if kind.IsProvider() { + provider = kind.New() + } + if provider == nil { + return fmt.Errorf("failed to determine the infrastructure provider") + } + return nil +} + +func Get() api.Provider { + if provider == nil { + panic("provider not set") + } + return provider +} diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go new file mode 100644 index 0000000000..9e1fe63e47 --- /dev/null +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -0,0 +1,645 @@ +package kind + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net" + "os/exec" + "strings" + "sync" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/ovn-org/ovn-kubernetes/test/e2e/containerengine" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/portalloc" + + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + utilnet "k8s.io/utils/net" +) + +// IsProvider returns true if clusters provider is KinD +func IsProvider() bool { + _, err := exec.LookPath("kubectl") + if err != nil { + panic("kubectl must be installed") + } + currentCtx, err := exec.Command("kubectl", "config", "current-context").CombinedOutput() + if err != nil { + panic(fmt.Sprintf("unable to get current cluster context: %v", err)) + } + if strings.Contains(string(currentCtx), "kind-ovn") { + return true + } + return false +} + +type kind struct { + externalContainerPort *portalloc.PortAllocator + hostPort *portalloc.PortAllocator +} + +func New() api.Provider { + return &kind{externalContainerPort: portalloc.New(12000, 65535), hostPort: portalloc.New(1024, 65535)} +} + +func (k *kind) Name() string { + return "kind" +} + +func (k *kind) PrimaryNetwork() (api.Network, error) { + return getNetwork("kind") +} + +func (k *kind) ExternalContainerPrimaryInterfaceName() string { + return "eth0" +} + +func (k *kind) GetNetwork(name string) (api.Network, error) { + return getNetwork(name) +} + +func (k *kind) GetExternalContainerNetworkInterface(container api.ExternalContainer, network api.Network) (api.NetworkInterface, error) { + return getNetworkInterface(container.Name, network.Name()) +} + +func (k *kind) GetK8NodeNetworkInterface(container string, network api.Network) (api.NetworkInterface, error) { + return getNetworkInterface(container, network.Name()) +} + +func (k *kind) ExecK8NodeCommand(nodeName string, cmd []string) (string, error) { + if !doesContainerNameExist(nodeName) { + return "", fmt.Errorf("cannot exec into container %q because it doesn't exist: %w", nodeName, api.NotFound) + } + if len(cmd) == 0 { + panic("ExecK8NodeCommand(): insufficient command arguments") + } + cmdArgs := append([]string{"exec", nodeName}, cmd...) + stdOut, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, stdOut) + } + return string(stdOut), nil +} + +func (k *kind) ExecExternalContainerCommand(container api.ExternalContainer, cmd []string) (string, error) { + if !doesContainerNameExist(container.Name) { + return "", fmt.Errorf("cannot exec into container %q because it doesn't exist: %w", container.Name, api.NotFound) + } + cmdArgs := append([]string{"exec", container.Name}, cmd...) + out, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to exec container command (%s): err: %v, stdout: %q", strings.Join(cmdArgs, " "), err, out) + } + return string(out), nil +} + +func (k *kind) GetExternalContainerLogs(container api.ExternalContainer) (string, error) { + if !doesContainerNameExist(container.Name) { + return "", fmt.Errorf("container %q doesn't exist, therefore no logs can be retrieved: %w", container.Name, api.NotFound) + } + stdOut, err := exec.Command(containerengine.Get().String(), "logs", container.Name).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to get logs of external container (%s): %v (%s)", container, err, stdOut) + } + return string(stdOut), nil +} + +func (k *kind) GetExternalContainerPort() uint16 { + return k.externalContainerPort.Allocate() +} + +func (k *kind) GetK8HostPort() uint16 { + return k.hostPort.Allocate() +} + +func (k *kind) NewTestContext() api.Context { + ck := &contextKind{Mutex: sync.Mutex{}} + ginkgo.DeferCleanup(ck.CleanUp) + return ck +} + +type contextKind struct { + sync.Mutex + cleanUpNetworkAttachments api.Attachments + cleanUpNetworks api.Networks + cleanUpContainers []api.ExternalContainer + cleanUpFns []func() error +} + +func (c *contextKind) CreateExternalContainer(container api.ExternalContainer) (api.ExternalContainer, error) { + c.Lock() + defer c.Unlock() + return c.createExternalContainer(container) +} + +func (c *contextKind) createExternalContainer(container api.ExternalContainer) (api.ExternalContainer, error) { + if valid, err := container.IsValidPreCreateContainer(); !valid { + return container, err + } + if doesContainerNameExist(container.Name) { + return container, fmt.Errorf("container %s already exists", container.Name) + } + cmd := []string{"run", "-itd", "--privileged", "--name", container.Name, "--network", container.Network.Name(), "--hostname", container.Name} + cmd = append(cmd, container.Image) + if len(container.Args) > 0 { + cmd = append(cmd, container.Args...) + } else { + if images.AgnHost() == container.Image { + cmd = append(cmd, "pause") + } + } + fmt.Printf("creating container with command: %q\n", strings.Join(cmd, " ")) + stdOut, err := exec.Command(containerengine.Get().String(), cmd...).CombinedOutput() + if err != nil { + return container, fmt.Errorf("failed to create container %s: %s (%s)", container, err, stdOut) + } + // fetch IPs for the attached container network. Host networked containers do not expose IP information. + if !isHostNetworked(container.Network.Name()) { + err = wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 360*time.Second, true, func(ctx context.Context) (done bool, err error) { + ni, err := getNetworkInterface(container.Name, container.Network.Name()) + if err != nil { + framework.Logf("attempt to get container %s network interface attached to network %s failed: %v, retrying...", container.Name, container.Network.Name(), err) + return false, nil + } + if ni.GetIPv4() == "" && ni.GetIPv6() == "" { + return false, nil + } + container.IPv4 = ni.GetIPv4() + container.IPv6 = ni.GetIPv6() + return true, nil + }) + if err != nil { + return container, fmt.Errorf("failed to get network interface information: %w", err) + } + } + + if valid, err := container.IsValidPostCreate(); !valid { + return container, err + } + c.cleanUpContainers = append(c.cleanUpContainers, container) + return container, nil +} + +func (c *contextKind) DeleteExternalContainer(container api.ExternalContainer) error { + c.Lock() + defer c.Unlock() + return c.deleteExternalContainer(container) +} + +func (c *contextKind) deleteExternalContainer(container api.ExternalContainer) error { + // check if it is present before deleting + if !doesContainerNameExist(container.Name) { + return nil + } + stdOut, err := exec.Command(containerengine.Get().String(), "rm", "-f", container.Name).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete external container (%s): %v (%s)", container, err, stdOut) + } + err = wait.ExponentialBackoff(wait.Backoff{Duration: 1 * time.Second, Factor: 5, Steps: 5}, wait.ConditionFunc(func() (done bool, err error) { + stdOut, err = exec.Command(containerengine.Get().String(), "ps", "-f", fmt.Sprintf("Name=^%s$", container.Name), "-q").CombinedOutput() + if err != nil { + return false, fmt.Errorf("failed to check if external container (%s) is deleted: %v (%s)", container, err, stdOut) + } + if string(stdOut) != "" { + return false, nil + } + return true, nil + })) + if err != nil { + return fmt.Errorf("failed to delete external container (%s): %v", container, err) + } + return nil +} + +func (c *contextKind) CreateNetwork(name string, subnets ...string) (api.Network, error) { + c.Lock() + defer c.Unlock() + return c.createNetwork(name, subnets...) +} + +func (c *contextKind) createNetwork(name string, subnets ...string) (api.Network, error) { + network := containerEngineNetwork{name, nil} + if doesNetworkExist(name) { + attachedContainers, err := getContainerAttachedToNetwork(name) + if err != nil { + framework.Logf("failed to get containers attached to network %s: %v", name, err) + } + if len(attachedContainers) > 0 { + return network, fmt.Errorf("network %s already exists with containers attached: '%v'", name, attachedContainers) + } + return network, fmt.Errorf("network %q already exists", name) + } + cmdArgs := []string{"network", "create", "--internal", "--driver", "bridge", name} + var v6 bool + // detect if IPv6 flag is required + for _, subnet := range subnets { + cmdArgs = append(cmdArgs, "--subnet", subnet) + if utilnet.IsIPv6CIDRString(subnet) { + v6 = true + } + } + if v6 { + cmdArgs = append(cmdArgs, "--ipv6") + } + stdOut, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() + if err != nil { + return network, fmt.Errorf("failed to create Network with command %q: %s (%s)", strings.Join(cmdArgs, " "), err, stdOut) + } + c.cleanUpNetworks.InsertNoDupe(network) + return getNetwork(name) +} + +func (c *contextKind) AttachNetwork(network api.Network, container string) (api.NetworkInterface, error) { + c.Lock() + defer c.Unlock() + return c.attachNetwork(network, container) +} + +func (c *contextKind) attachNetwork(network api.Network, container string) (api.NetworkInterface, error) { + if !doesNetworkExist(network.Name()) { + return api.NetworkInterface{}, fmt.Errorf("network %s doesn't exist", network.Name()) + } + if isNetworkAttachedToContainer(network.Name(), container) { + return api.NetworkInterface{}, fmt.Errorf("network %s is already attached to container %s", network.Name(), container) + } + // return if the network is connected to the container + stdOut, err := exec.Command(containerengine.Get().String(), "network", "connect", network.Name(), container).CombinedOutput() + if err != nil { + return api.NetworkInterface{}, fmt.Errorf("failed to attach network to container %s: %s (%s)", container, err, stdOut) + } + c.cleanUpNetworkAttachments.InsertNoDupe(api.Attachment{Network: network, Instance: container}) + return getNetworkInterface(container, network.Name()) +} + +func (c *contextKind) DetachNetwork(network api.Network, container string) error { + c.Lock() + defer c.Unlock() + return c.detachNetwork(network, container) +} + +func (c *contextKind) detachNetwork(network api.Network, container string) error { + if !doesNetworkExist(network.Name()) { + return nil + } + if !isNetworkAttachedToContainer(network.Name(), container) { + return nil + } + stdOut, err := exec.Command(containerengine.Get().String(), "network", "disconnect", network.Name(), container).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to detach network %s from node %s: %s (%s)", network, container, err, stdOut) + } + return nil +} + +func (c *contextKind) DeleteNetwork(network api.Network) error { + c.Lock() + defer c.Unlock() + return c.deleteNetwork(network) +} + +func (c *contextKind) deleteNetwork(network api.Network) error { + return wait.PollImmediate(1*time.Second, 10*time.Second, func() (done bool, err error) { + if !doesNetworkExist(network.Name()) { + return true, nil + } + // ensure all containers are disconnected from the network and if any are found, disconnect it. + delimiter := " " + stdOutBytes, err := exec.Command(containerengine.Get().String(), + "network", "inspect", "-f", fmt.Sprintf("'{{range .Containers}}{{.Name}}%s{{end}}'", delimiter), network.Name()).CombinedOutput() + if err != nil { + framework.Logf("failed to list attached containers for network %s:, err: %s, stdout: (%s)", network, err, stdOutBytes) + return false, nil + } + allContainers := strings.TrimSuffix(string(stdOutBytes), "\n") + if allContainers != "" { + for _, containerName := range strings.Split(allContainers, delimiter) { + containerName = strings.TrimLeft(containerName, "'") + containerName = strings.TrimRight(containerName, "'") + if containerName == "" { + continue + } + framework.Logf("deleting network encountered a stale container %q and it must be removed before removing the network", containerName) + framework.Logf("Warning: Fix tests for container %s.. deleting container", containerName) + if err = c.detachNetwork(network, containerName); err != nil { + framework.Logf("while trying to delete network %q, attempted to detach container %q that is "+ + "still attached to network", network.Name(), containerName) + return false, nil + } + } + } + stdOut, err := exec.Command(containerengine.Get().String(), "network", "rm", network.Name()).CombinedOutput() + if err != nil { + framework.Logf("failed to delete network %s: %s (%s)", network.Name(), err, stdOut) + return false, nil + } + return true, nil + }) +} + +func (c *contextKind) GetAttachedNetworks() (api.Networks, error) { + c.Lock() + defer c.Unlock() + return c.getAttachedNetworks() +} + +func (c *contextKind) getAttachedNetworks() (api.Networks, error) { + primaryNetwork, err := getNetwork("kind") + if err != nil { + return api.Networks{}, fmt.Errorf("failed to get primary network: %v", err) + } + attachedNetworks := api.Networks{List: []api.Network{primaryNetwork}} + for _, attachment := range c.cleanUpNetworkAttachments.List { + attachedNetworks.InsertNoDupe(attachment.Network) + } + return attachedNetworks, nil +} + +func (c *contextKind) AddCleanUpFn(cleanUpFn func() error) { + c.Lock() + defer c.Unlock() + c.addCleanUpFn(cleanUpFn) +} + +func (c *contextKind) addCleanUpFn(cleanUpFn func() error) { + c.cleanUpFns = append(c.cleanUpFns, cleanUpFn) +} + +func (c *contextKind) CleanUp() error { + c.Lock() + defer c.Unlock() + err := c.cleanUp() + if err != nil { + framework.Logf("Cleanup failed: %v", err) + } + return err +} + +// CleanUp must be syncronised by caller +func (c *contextKind) cleanUp() error { + var errs []error + // generic cleanup activities + for i := len(c.cleanUpFns) - 1; i >= 0; i-- { + if err := c.cleanUpFns[i](); err != nil { + errs = append(errs, err) + } + } + c.cleanUpFns = nil + // detach network(s) from nodes + for _, na := range c.cleanUpNetworkAttachments.List { + if err := c.detachNetwork(na.Network, na.Instance); err != nil && !errors.Is(err, api.NotFound) { + errs = append(errs, err) + } + } + // remove containers + for _, container := range c.cleanUpContainers { + if err := c.deleteExternalContainer(container); err != nil && !errors.Is(err, api.NotFound) { + errs = append(errs, err) + } + } + c.cleanUpContainers = nil + // delete secondary networks + for _, network := range c.cleanUpNetworks.List { + if err := c.deleteNetwork(network); err != nil && !errors.Is(err, api.NotFound) { + errs = append(errs, err) + } + } + c.cleanUpNetworks.List = nil + return condenseErrors(errs) +} + +const ( + nameFormat = "{{.Name}}" + inspectNetworkIPAMJSON = "{{json .IPAM.Config }}" + inspectNetworkIPv4GWKeyStr = "{{ .NetworkSettings.Networks.%s.Gateway }}" + inspectNetworkIPv4AddrKeyStr = "{{ .NetworkSettings.Networks.%s.IPAddress }}" + inspectNetworkIPv4PrefixKeyStr = "{{ .NetworkSettings.Networks.%s.IPPrefixLen }}" + inspectNetworkIPv6GWKeyStr = "{{ .NetworkSettings.Networks.%s.IPv6Gateway }}" + inspectNetworkIPv6AddrKeyStr = "{{ .NetworkSettings.Networks.%s.GlobalIPv6Address }}" + inspectNetworkIPv6PrefixKeyStr = "{{ .NetworkSettings.Networks.%s.GlobalIPv6PrefixLen }}" + inspectNetworkMACKeyStr = "{{ .NetworkSettings.Networks.%s.MacAddress }}" + inspectNetworkContainersKeyStr = "{{ range $key, $value := .Containers }}{{ printf \"%s\\n\" $value.Name}}{{ end }}'" + emptyValue = "" +) + +func isNetworkAttachedToContainer(networkName, containerName string) bool { + // error is returned if failed to find network attached to instance or no IPv4/IPv6 Ips. + _, err := getNetworkInterface(containerName, networkName) + if err != nil { + return false + } + return true +} + +func doesContainerNameExist(name string) bool { + // check if it is present before retrieving logs + stdOut, err := exec.Command(containerengine.Get().String(), "ps", "-f", fmt.Sprintf("Name=^%s$", name), "-q").CombinedOutput() + if err != nil { + panic(fmt.Sprintf("failed to check if external container (%s) exists: %v (%s)", name, err, stdOut)) + } + if string(stdOut) == "" { + return false + } + return true +} + +func doesNetworkExist(networkName string) bool { + dataBytes, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + if err != nil { + panic(err.Error()) + } + for _, existingNetworkName := range strings.Split(strings.Trim(string(dataBytes), "\n"), "\n") { + if existingNetworkName == networkName { + return true + } + } + return false +} + +func getNetwork(networkName string) (containerEngineNetwork, error) { + n := containerEngineNetwork{name: networkName} + if !doesNetworkExist(networkName) { + return n, api.NotFound + } + configs := make([]containerEngineNetworkConfig, 0, 1) + dataBytes, err := exec.Command(containerengine.Get().String(), "network", "inspect", "-f", inspectNetworkIPAMJSON, networkName).CombinedOutput() + if err != nil { + return n, fmt.Errorf("failed to extract network %q data: %v", networkName, err) + } + dataBytes = []byte(strings.Trim(string(dataBytes), "\n")) + if err = json.Unmarshal(dataBytes, &configs); err != nil { + return n, fmt.Errorf("failed to unmarshall network %q configuration using network inspect -f %q: %v", networkName, inspectNetworkIPAMJSON, err) + } + if len(configs) == 0 { + return n, fmt.Errorf("failed to find any IPAM configuration for network %s", networkName) + } + // validate configs + for _, config := range configs { + if config.Subnet == "" { + return n, fmt.Errorf("network %s contains invalid subnet config", networkName) + } + } + n.Configs = configs + return n, nil +} + +func getContainerAttachedToNetwork(networkName string) ([]string, error) { + dataBytes, err := exec.Command(containerengine.Get().String(), "network", "inspect", "-f", inspectNetworkContainersKeyStr, networkName).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to fetch containers attached to network %q, err: %v", networkName, err) + } + var containers []string + + for _, container := range strings.Split(string(dataBytes), "\n") { + container = strings.Trim(container, "'") + if container != "" { + containers = append(containers, container) + } + } + return containers, nil +} + +func getNetworkInterface(containerName, networkName string) (api.NetworkInterface, error) { + var ni = api.NetworkInterface{} + if !doesNetworkExist(networkName) { + return ni, fmt.Errorf("failed to find network %q: %w", networkName, api.NotFound) + } + if !doesContainerNameExist(containerName) { + return ni, fmt.Errorf("failed to find container %q: %w", containerName, api.NotFound) + } + getContainerNetwork := func(inspectTemplate string) (string, error) { + value, err := exec.Command(containerengine.Get().String(), "inspect", "-f", + fmt.Sprintf("'"+inspectTemplate+"'", networkName), containerName).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to extract %s network data for container %s using inspect template %s: %v", + networkName, containerName, inspectTemplate, err) + } + valueStr := strings.Trim(string(value), "\n") + valueStr = strings.Trim(valueStr, "'") + if valueStr == emptyValue { + return "", nil + } + return valueStr, nil + } + + getIPFamilyFlagForIPRoute2 := func(ipStr string) string { + ip := net.ParseIP(ipStr) + if ip == nil { + panic("invalid IP") + } + if utilnet.IsIPv6(ip) { + return "-6" + } + return "-4" + } + + getInterfaceNameUsingIP := func(ip string) (string, error) { + allInfAddrBytes, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "-br", getIPFamilyFlagForIPRoute2(ip), "a", "sh").CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to find interface with IP %s on container %s with command 'ip -br a sh': err %v, out: %s", ip, containerName, + err, allInfAddrBytes) + } + var ipLine string + for _, line := range strings.Split(string(allInfAddrBytes), "\n") { + if strings.Contains(line, ip) { + ipLine = line + break + } + } + if ipLine == "" { + return "", fmt.Errorf("failed to find IP %q within 'ip a' command on container %q:\n\n%q", ip, containerName, string(allInfAddrBytes)) + } + ipLineSplit := strings.Split(ipLine, " ") + if len(ipLine) == 0 { + return "", fmt.Errorf("failed to find interface name from 'ip a' output line %q", ipLine) + } + infNames := ipLineSplit[0] + splitChar := " " + if strings.Contains(infNames, "@") { + splitChar = "@" + } + infNamesSplit := strings.Split(infNames, splitChar) + if len(infNamesSplit) == 0 { + return "", fmt.Errorf("failed to extract inf name + veth name from %q splitting by %q", infNames, splitChar) + } + infName := infNamesSplit[0] + // validate its an interface name on the Node with iproute2 + out, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "link", "show", infName).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to validate that interface name %q with IP %s exists in container %s: err %v, out: %s", + infName, ip, containerName, err, out) + } + return infName, nil // second value is veth in 'host' netns + } + + var err error + ni.IPv4Gateway, err = getContainerNetwork(inspectNetworkIPv4GWKeyStr) + if err != nil { + // may not be available + framework.Logf("failed to get network gateway IPv4 %s: %v", err) + } + ni.IPv4, err = getContainerNetwork(inspectNetworkIPv4AddrKeyStr) + if err != nil { + return ni, err + } + if ni.IPv4 != "" { + ni.InfName, err = getInterfaceNameUsingIP(ni.IPv4) + if err != nil { + framework.Logf("failed to get network interface name using IPv4 address %s: %v", ni.IPv4, err) + } + } + ni.IPv6Gateway, err = getContainerNetwork(inspectNetworkIPv6GWKeyStr) + if err != nil { + framework.Logf("failed to get network gateway IPv6 %s: %v", err) + } + ni.IPv4Prefix, err = getContainerNetwork(inspectNetworkIPv4PrefixKeyStr) + if err != nil { + return ni, err + } + ni.IPv6, err = getContainerNetwork(inspectNetworkIPv6AddrKeyStr) + if err != nil { + return ni, err + } + if ni.IPv6 != "" { + ni.InfName, err = getInterfaceNameUsingIP(ni.IPv6) + if err != nil { + framework.Logf("failed to get network interface name using IPv4 address %s: %v", ni.IPv6, err) + } + } + ni.IPv6Prefix, err = getContainerNetwork(inspectNetworkIPv6PrefixKeyStr) + if err != nil { + return ni, err + } + ni.MAC, err = getContainerNetwork(inspectNetworkMACKeyStr) + if err != nil { + return ni, err + } + // fail if no IPs were found + if ni.IPv4 == "" && ni.IPv6 == "" { + return ni, fmt.Errorf("failed to get an IPv4 and/or IPv6 address for interface attached to container %q"+ + " and attached to network %q", containerName, networkName) + } + return ni, nil +} + +func isHostNetworked(networkName string) bool { + return networkName == "host" +} + +func condenseErrors(errs []error) error { + switch len(errs) { + case 0: + return nil + case 1: + return errs[0] + } + err := errs[0] + for _, e := range errs[1:] { + err = errors.Join(err, e) + } + return err +} diff --git a/test/e2e/infraprovider/providers/kind/network.go b/test/e2e/infraprovider/providers/kind/network.go new file mode 100644 index 0000000000..22718a8379 --- /dev/null +++ b/test/e2e/infraprovider/providers/kind/network.go @@ -0,0 +1,57 @@ +package kind + +import ( + "fmt" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "k8s.io/utils/net" +) + +type containerEngineNetwork struct { + name string + Configs []containerEngineNetworkConfig +} + +type containerEngineNetworkConfig struct { + Subnet string `json:"Subnet"` + Gateway string `json:"Gateway"` +} + +func (n containerEngineNetwork) Name() string { + return n.name +} + +func (n containerEngineNetwork) IPv4IPv6Subnets() (string, string, error) { + if len(n.Configs) == 0 { + panic("failed to get IPV4/V6 because network doesnt contain configuration") + } + var v4, v6 string + for _, config := range n.Configs { + if config.Subnet == "" { + panic(fmt.Sprintf("failed to get IPV4/V6 because network %s contains a config with an empty subnet", n.Name)) + } + ip, _, err := net.ParseCIDRSloppy(config.Subnet) + if err != nil { + panic(fmt.Sprintf("failed to parse network %s subnet %q: %v", n.Name, config.Subnet, err)) + } + if net.IsIPv4(ip) { + v4 = config.Subnet + } else { + v6 = config.Subnet + } + } + if v4 == "" && v6 == "" { + return "", "", fmt.Errorf("failed to find IPv4 and IPv6 addresses for network %s", n.Name) + } + return v4, v6, nil +} + +func (n containerEngineNetwork) Equal(candidate api.Network) bool { + if n.name != candidate.Name() { + return false + } + return true +} + +func (n containerEngineNetwork) String() string { + return n.name +} diff --git a/test/e2e/ipalloc/ipalloc.go b/test/e2e/ipalloc/ipalloc.go new file mode 100644 index 0000000000..7decbaa0a1 --- /dev/null +++ b/test/e2e/ipalloc/ipalloc.go @@ -0,0 +1,47 @@ +package ipalloc + +import ( + "fmt" + "math/big" + "net" +) + +type ipAllocator struct { + net *net.IPNet + // base is a cached version of the start IP in the CIDR range as a *big.Int + base *big.Int + // max is the maximum size of the usable addresses in the range + max int + count int +} + +func newIPAllocator(cidr *net.IPNet) *ipAllocator { + return &ipAllocator{net: cidr, base: getBaseInt(cidr.IP), max: limit(cidr)} +} + +func (n *ipAllocator) AllocateNextIP() (net.IP, error) { + if n.count >= n.max { + return net.IP{}, fmt.Errorf("limit of %d reached", n.max) + } + n.base.Add(n.base, big.NewInt(1)) + n.count += 1 + b := n.base.Bytes() + b = append(make([]byte, 16), b...) + return b[len(b)-16:], nil +} + +func getBaseInt(ip net.IP) *big.Int { + return big.NewInt(0).SetBytes(ip.To16()) +} + +func limit(subnet *net.IPNet) int { + ones, bits := subnet.Mask.Size() + if bits == 32 && (bits-ones) >= 31 || bits == 128 && (bits-ones) >= 127 { + return 0 + } + // limit to 2^8 (256) IPs for e2es + if bits == 128 && (bits-ones) >= 8 { + return int(1) << uint(8) + } + return int(1) << uint(bits-ones) +} diff --git a/test/e2e/ipalloc/primaryipalloc.go b/test/e2e/ipalloc/primaryipalloc.go new file mode 100644 index 0000000000..79a0ae5010 --- /dev/null +++ b/test/e2e/ipalloc/primaryipalloc.go @@ -0,0 +1,220 @@ +package ipalloc + +import ( + "context" + "fmt" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/client-go/kubernetes/typed/core/v1" + "net" + "sync" +) + +// primaryIPAllocator attempts to allocate an IP in the same subnet as a nodes primary network +type primaryIPAllocator struct { + mu *sync.Mutex + v4 *ipAllocator + v6 *ipAllocator + nodeClient v1.NodeInterface +} + +var pia *primaryIPAllocator + +// InitPrimaryIPAllocator must be called to init IP allocator(s). Callers must be synchronise. +func InitPrimaryIPAllocator(nodeClient v1.NodeInterface) error { + var err error + pia, err = newPrimaryIPAllocator(nodeClient) + return err +} + +func NewPrimaryIPv4() (net.IP, error) { + return pia.AllocateNextV4() +} + +func NewPrimaryIPv6() (net.IP, error) { + return pia.AllocateNextV6() +} + +// newPrimaryIPAllocator gets a Nodes primary interfaces network info, increments the 2 octet and checks if the IP is still +// within the subnet of all the K8 nodes. +func newPrimaryIPAllocator(nodeClient v1.NodeInterface) (*primaryIPAllocator, error) { + ipa := &primaryIPAllocator{mu: &sync.Mutex{}, nodeClient: nodeClient} + nodes, err := nodeClient.List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return ipa, fmt.Errorf("failed to get a list of node(s): %v", err) + } + if len(nodes.Items) == 0 { + return ipa, fmt.Errorf("expected at least one node but found zero") + } + // FIXME: the approach taken here to find the first node IP+mask and then to increment the second last octet wont work in + // all scenarios (node with /24). We should generate an EgressIP compatible with a Node providers primary network and then take care its unique globally. + + // The approach here is to grab initial starting IP from first node found, increment the second last octet. + // Approach taken here won't work for Nodes handed /24 subnets. + nodePrimaryIPs, err := util.ParseNodePrimaryIfAddr(&nodes.Items[0]) + if err != nil { + return ipa, fmt.Errorf("failed to parse node primary interface address from Node object: %v", err) + } + if nodePrimaryIPs.V4.IP != nil { + // should be ok with /16 and /64 node primary provider subnets + // TODO; fixme; what about /24 subnet Nodes like GCP + nodePrimaryIPs.V4.IP[len(nodePrimaryIPs.V4.IP)-2]++ + ipa.v4 = newIPAllocator(&net.IPNet{IP: nodePrimaryIPs.V4.IP, Mask: nodePrimaryIPs.V4.Net.Mask}) + } + if nodePrimaryIPs.V6.IP != nil { + nodePrimaryIPs.V6.IP[len(nodePrimaryIPs.V6.IP)-2]++ + ipa.v6 = newIPAllocator(&net.IPNet{IP: nodePrimaryIPs.V6.IP, Mask: nodePrimaryIPs.V6.Net.Mask}) + } + // verify the new starting base IP is within all Nodes subnets + if nodePrimaryIPs.V4.IP != nil { + ipNets, err := getNodePrimaryProviderIPs(nodes.Items, false) + if err != nil { + return ipa, err + } + nextIP, err := ipa.v4.AllocateNextIP() + if err != nil { + return ipa, err + } + if !isIPWithinAllSubnets(ipNets, nextIP) { + return ipa, fmt.Errorf("IP %s is not within all Node subnets", nextIP) + } + } + if nodePrimaryIPs.V6.IP != nil { + ipNets, err := getNodePrimaryProviderIPs(nodes.Items, true) + if err != nil { + return ipa, err + } + nextIP, err := ipa.v6.AllocateNextIP() + if err != nil { + return ipa, err + } + if !isIPWithinAllSubnets(ipNets, nextIP) { + return ipa, fmt.Errorf("IP %s is not within all Node subnets", nextIP) + } + } + + return ipa, nil +} + +func getNodePrimaryProviderIPs(nodes []corev1.Node, isIPv6 bool) ([]*net.IPNet, error) { + ipNets := make([]*net.IPNet, 0, len(nodes)) + for _, node := range nodes { + nodePrimaryIPs, err := util.ParseNodePrimaryIfAddr(&node) + if err != nil { + return nil, fmt.Errorf("failed to parse node primary interface address from Node %s object: %v", node.Name, err) + } + var mask net.IPMask + var ip net.IP + + if isIPv6 { + ip = nodePrimaryIPs.V6.IP + mask = nodePrimaryIPs.V6.Net.Mask + } else { + ip = nodePrimaryIPs.V4.IP + mask = nodePrimaryIPs.V4.Net.Mask + } + if len(ip) == 0 || len(mask) == 0 { + return nil, fmt.Errorf("failed to find Node %s primary Node IP and/or mask", node.Name) + } + ipNets = append(ipNets, &net.IPNet{IP: ip, Mask: mask}) + } + return ipNets, nil +} + +func isIPWithinAllSubnets(ipNets []*net.IPNet, ip net.IP) bool { + if len(ipNets) == 0 { + return false + } + for _, ipNet := range ipNets { + if !ipNet.Contains(ip) { + return false + } + } + return true +} + +func (pia *primaryIPAllocator) IncrementAndGetNextV4(times int) (net.IP, error) { + var err error + for i := 0; i < times; i++ { + if _, err = pia.AllocateNextV4(); err != nil { + return nil, err + } + } + return pia.AllocateNextV4() +} + +func (pia *primaryIPAllocator) AllocateNextV4() (net.IP, error) { + if pia.v4 == nil { + return nil, fmt.Errorf("IPv4 is not enable ") + } + if pia.v4.net == nil { + return nil, fmt.Errorf("IPv4 is not enabled but Allocation request was called") + } + pia.mu.Lock() + defer pia.mu.Unlock() + return allocateIP(pia.nodeClient, pia.v4.AllocateNextIP) +} + +func (pia *primaryIPAllocator) IncrementAndGetNextV6(times int) (net.IP, error) { + var err error + for i := 0; i < times; i++ { + if _, err = pia.AllocateNextV6(); err != nil { + return nil, err + } + } + return pia.AllocateNextV6() +} + +func (pia primaryIPAllocator) AllocateNextV6() (net.IP, error) { + if pia.v6 == nil { + return nil, fmt.Errorf("IPv6 is not enabled but Allocation request was called") + } + if pia.v6.net == nil { + return nil, fmt.Errorf("ipv6 network is not set") + } + pia.mu.Lock() + defer pia.mu.Unlock() + return allocateIP(pia.nodeClient, pia.v6.AllocateNextIP) +} + +type allocNextFn func() (net.IP, error) + +func allocateIP(nodeClient v1.NodeInterface, allocateFn allocNextFn) (net.IP, error) { + nodeList, err := nodeClient.List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %v", err) + } + for { + nextIP, err := allocateFn() + if err != nil { + return nil, fmt.Errorf("failed to allocated next IP address: %v", err) + } + firstOctet := nextIP[len(nextIP)-1] + // skip 0 and 1 + if firstOctet == 0 || firstOctet == 1 { + continue + } + isConflict, err := isConflictWithExistingHostIPs(nodeList.Items, nextIP) + if err != nil { + return nil, fmt.Errorf("failed to determine if IP conflicts with existing IPs: %v", err) + } + if !isConflict { + return nextIP, nil + } + } +} + +func isConflictWithExistingHostIPs(nodes []corev1.Node, ip net.IP) (bool, error) { + ipStr := ip.String() + for _, node := range nodes { + nodeIPsSet, err := util.ParseNodeHostCIDRsDropNetMask(&node) + if err != nil { + return false, fmt.Errorf("failed to parse node %s primary annotation info: %v", node.Name, err) + } + if nodeIPsSet.Has(ipStr) { + return true, nil + } + } + return false, nil +} diff --git a/test/e2e/ipalloc/primaryipalloc_test.go b/test/e2e/ipalloc/primaryipalloc_test.go new file mode 100644 index 0000000000..815915b7ea --- /dev/null +++ b/test/e2e/ipalloc/primaryipalloc_test.go @@ -0,0 +1,177 @@ +package ipalloc + +import ( + "fmt" + "net" + "strings" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + utilsnet "k8s.io/utils/net" +) + +func TestUtilSuite(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "node ip alloc suite") +} + +func TestAllocateNext(t *testing.T) { + tests := []struct { + desc string + input *net.IPNet + output []net.IP + }{ + { + desc: "increments IPv4 address", + input: mustParseCIDRIncIP("192.168.1.5/16"), // mask /24 would fail + output: []net.IP{net.ParseIP("192.168.1.6"), net.ParseIP("192.168.1.7"), net.ParseIP("192.168.1.8")}, + }, + { + desc: "increments IPv6 address", + input: mustParseCIDRIncIP("fc00:f853:ccd:e793::6/64"), + output: []net.IP{net.ParseIP("fc00:f853:ccd:e793::7"), net.ParseIP("fc00:f853:ccd:e793::8"), net.ParseIP("fc00:f853:ccd:e793::9")}, + }, + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + nodeIPAlloc := newIPAllocator(tc.input) + for _, expectedIP := range tc.output { + allocatedIP, err := nodeIPAlloc.AllocateNextIP() + if err != nil { + t.Errorf("failed to allocated next IP: %v", err) + } + if !allocatedIP.Equal(expectedIP) { + t.Errorf("Expected IP %q, but got %q", expectedIP.String(), allocatedIP.String()) + } + } + }) + } +} + +// mustParseCIDRIncIP parses the IP and CIDR. It adds the IP to the returned IPNet. +func mustParseCIDRIncIP(cidr string) *net.IPNet { + ip, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + panic(fmt.Sprintf("failed to parse CIDR %q: %v", cidr, err)) + } + ipNet.IP = ip + return ipNet +} + +type network struct { + ip string + mask string +} + +type node struct { + v4 network + v6 network +} + +func TestIPAlloc(t *testing.T) { + tests := []struct { + desc string + existingPrimaryNodeIPs []node + expectedFromAllocateNext []string + }{ + { + desc: "IPv4", + existingPrimaryNodeIPs: []node{{v4: network{ip: "192.168.1.1", mask: "16"}}, {v4: network{ip: "192.168.1.2", mask: "16"}}}, + expectedFromAllocateNext: []string{"192.168.2.3", "192.168.2.4"}, + }, + { + desc: "IPv6", + existingPrimaryNodeIPs: []node{{v4: network{ip: "fc00:f853:ccd:e793::5", mask: "64"}}, {v4: network{ip: "fc00:f853:ccd:e793::6", mask: "64"}}}, + expectedFromAllocateNext: []string{"fc00:f853:ccd:e793::8", "fc00:f853:ccd:e793::9"}, + }, + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + cs := fake.NewSimpleClientset(getNodesWithIPs(tc.existingPrimaryNodeIPs)) + pipa, err := newPrimaryIPAllocator(cs.CoreV1().Nodes()) + if err != nil { + t.Errorf(err.Error()) + return + } + for _, expectedIPStr := range tc.expectedFromAllocateNext { + expectedIP := net.ParseIP(expectedIPStr) + var nextIP net.IP + var err error + if utilsnet.IsIPv6(expectedIP) { + nextIP, err = pipa.AllocateNextV6() + } else { + nextIP, err = pipa.AllocateNextV4() + } + if err != nil || nextIP == nil { + t.Errorf("failed to allocated next IPv4 or IPv6 address. err %v", err) + return + } + if !nextIP.Equal(expectedIP) { + t.Errorf("expected IP %q, but found %q", expectedIP, nextIP) + } + } + }) + } + +} + +func getNodesWithIPs(nodesSpec []node) runtime.Object { + nodeObjs := make([]corev1.Node, 0, len(nodesSpec)) + getIPMaskFn := func(ip, mask string) string { + if ip == "" || mask == "" { + return "" + } + return fmt.Sprintf("%s/%s", ip, mask) + } + + getArrayForHostCIDRs := func(n node) string { + cidrs := []string{} + if cidr := getIPMaskFn(n.v4.ip, n.v4.mask); cidr != "" { + cidrs = append(cidrs, fmt.Sprintf("\"%s\"", cidr)) + } + if cidr := getIPMaskFn(n.v6.ip, n.v6.mask); cidr != "" { + cidrs = append(cidrs, fmt.Sprintf("\"%s\"", cidr)) + } + return fmt.Sprintf("[%s]", strings.Join(cidrs, ",")) + } + + for i, node := range nodesSpec { + nodePrimaryIfAddrValue := fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", + getIPMaskFn(node.v4.ip, node.v4.mask), getIPMaskFn(node.v6.ip, node.v6.mask)) + node1Annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": nodePrimaryIfAddrValue, + util.OVNNodeHostCIDRs: getArrayForHostCIDRs(node), + } + nodeObjs = append(nodeObjs, getNodeObj(fmt.Sprintf("node%d", i), node1Annotations, map[string]string{})) + } + nl := &corev1.NodeList{Items: nodeObjs} + return nl +} + +func getNodeObj(nodeName string, annotations, labels map[string]string) corev1.Node { + return corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Annotations: annotations, + Labels: labels, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } +} diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index 928d81bc79..aa0a6a246c 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -18,9 +18,15 @@ import ( "gopkg.in/yaml.v2" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + rav1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1" + crdtypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" + udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" "github.com/ovn-org/ovn-kubernetes/test/e2e/diagnostics" "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" "github.com/ovn-org/ovn-kubernetes/test/e2e/kubevirt" corev1 "k8s.io/api/core/v1" @@ -65,24 +71,25 @@ func newControllerRuntimeClient() (crclient.Client, error) { return nil, err } scheme := runtime.NewScheme() - err = kubevirtv1.AddToScheme(scheme) - if err != nil { + if err := kubevirtv1.AddToScheme(scheme); err != nil { return nil, err } - err = kvmigrationsv1alpha1.AddToScheme(scheme) - if err != nil { + if err := kvmigrationsv1alpha1.AddToScheme(scheme); err != nil { return nil, err } - err = ipamclaimsv1alpha1.AddToScheme(scheme) - if err != nil { + if err := ipamclaimsv1alpha1.AddToScheme(scheme); err != nil { return nil, err } - err = nadv1.AddToScheme(scheme) - if err != nil { + if err := nadv1.AddToScheme(scheme); err != nil { return nil, err } - err = corev1.AddToScheme(scheme) - if err != nil { + if err := corev1.AddToScheme(scheme); err != nil { + return nil, err + } + if err := udnv1.AddToScheme(scheme); err != nil { + return nil, err + } + if err := rav1.AddToScheme(scheme); err != nil { return nil, err } return crclient.New(config, crclient.Options{ @@ -96,12 +103,14 @@ var _ = Describe("Kubevirt Virtual Machines", func() { d = diagnostics.New(fr) crClient crclient.Client namespace string + iperf3DefaultPort = int32(5201) tcpServerPort = int32(9900) wg sync.WaitGroup selectedNodes = []corev1.Node{} httpServerTestPods = []*corev1.Pod{} iperfServerTestPods = []*corev1.Pod{} clientSet kubernetes.Interface + providerCtx infraapi.Context // Systemd resolvd prevent resolving kube api service by fqdn, so // we replace it here with NetworkManager @@ -134,6 +143,8 @@ var _ = Describe("Kubevirt Virtual Machines", func() { shouldExpectFailure bool } + type execFnType = func(cmd string) (string, error) + var ( sendEcho = func(conn *net.TCPConn) error { strEcho := "Halo" @@ -196,8 +207,11 @@ var _ = Describe("Kubevirt Virtual Machines", func() { return conn, nil } - dialServiceNodePort = func(svc *corev1.Service) ([]*net.TCPConn, error) { - worker, err := fr.ClientSet.CoreV1().Nodes().Get(context.TODO(), "ovn-worker", metav1.GetOptions{}) + dialServiceNodePort = func(client kubernetes.Interface, svc *corev1.Service) ([]*net.TCPConn, error) { + worker, err := e2enode.GetRandomReadySchedulableNode(context.TODO(), client) + if err != nil { + return nil, fmt.Errorf("failed to find ready and schedulable node: %v", err) + } if err != nil { return nil, err } @@ -205,7 +219,7 @@ var _ = Describe("Kubevirt Virtual Machines", func() { nodePort := fmt.Sprintf("%d", svc.Spec.Ports[0].NodePort) port := fmt.Sprintf("%d", svc.Spec.Ports[0].Port) - d.TCPDumpDaemonSet([]string{"any", "eth0", "breth0"}, fmt.Sprintf("port %s or port %s", port, nodePort)) + d.TCPDumpDaemonSet([]string{"any", deploymentconfig.Get().PrimaryInterfaceName(), deploymentconfig.Get().ExternalBridgeName()}, fmt.Sprintf("port %s or port %s", port, nodePort)) for _, address := range worker.Status.Addresses { if address.Type != corev1.NodeHostName { addr := net.JoinHostPort(address.Address, nodePort) @@ -352,37 +366,77 @@ var _ = Describe("Kubevirt Virtual Machines", func() { } } } - - startNorthSouthIngressIperfTraffic = func(containerName string, addresses []string, port int32, stage string) error { + startNorthSouthIperfTraffic = func(execFn execFnType, addresses []string, port int32, logPrefix, stage string) error { GinkgoHelper() Expect(addresses).NotTo(BeEmpty()) for _, address := range addresses { - iperfLogFile := fmt.Sprintf("/tmp/ingress_test_%[1]s_%[2]d_iperf3.log", address, port) - output, err := runCommand(containerRuntime, "exec", containerName, "bash", "-c", fmt.Sprintf(` -iperf3 -c %[1]s -p %[2]d -killall iperf3 -rm -f %[3]s -iperf3 -t 0 -c %[1]s -p %[2]d --logfile %[3]s & -`, address, port, iperfLogFile)) + iperfLogFile := fmt.Sprintf("/tmp/%s_test_%s_%d_iperf3.log", logPrefix, address, port) + By(fmt.Sprintf("remove iperf3 log for %s: %s", address, stage)) + output, err := execFn(fmt.Sprintf("rm -f %s", iperfLogFile)) + if err != nil { + return fmt.Errorf("failed removing iperf3 log file %s: %w", output, err) + } + + By(fmt.Sprintf("check iperf3 connectivity for %s: %s", address, stage)) + output, err = execFn(fmt.Sprintf("iperf3 -c %s -p %d", address, port)) + if err != nil { + return fmt.Errorf("failed checking iperf3 connectivity %s: %w", output, err) + } + + By(fmt.Sprintf("start from %s: %s", address, stage)) + output, err = execFn(fmt.Sprintf("nohup iperf3 -t 0 -c %[1]s -p %[2]d --logfile %[3]s &", address, port, iperfLogFile)) if err != nil { - return fmt.Errorf("%s: %w", output, err) + return fmt.Errorf("failed at starting iperf3 in background %s: %w", output, err) } } return nil } + startNorthSouthIngressIperfTraffic = func(containerName string, addresses []string, port int32, stage string) error { + GinkgoHelper() + execFn := func(cmd string) (string, error) { + return infraprovider.Get().ExecExternalContainerCommand(infraapi.ExternalContainer{Name: containerName}, []string{"bash", "-c", cmd}) + } + return startNorthSouthIperfTraffic(execFn, addresses, port, "ingress", stage) + } + + startNorthSouthEgressIperfTraffic = func(vmi *kubevirtv1.VirtualMachineInstance, addresses []string, port int32, stage string) error { + GinkgoHelper() + execFn := func(cmd string) (string, error) { + return kubevirt.RunCommand(vmi, cmd, 5*time.Second) + } + return startNorthSouthIperfTraffic(execFn, addresses, port, "egress", stage) + } + checkNorthSouthIngressIperfTraffic = func(containerName string, addresses []string, port int32, stage string) { GinkgoHelper() Expect(addresses).NotTo(BeEmpty()) for _, ip := range addresses { iperfLogFile := fmt.Sprintf("/tmp/ingress_test_%s_%d_iperf3.log", ip, port) execFn := func(cmd string) (string, error) { - return runCommand(containerRuntime, "exec", containerName, "bash", "-c", cmd) + return infraprovider.Get().ExecExternalContainerCommand(infraapi.ExternalContainer{Name: containerName}, []string{"bash", "-c", cmd}) } checkIperfTraffic(iperfLogFile, execFn, stage) } } + checkNorthSouthEgressIperfTraffic = func(vmi *kubevirtv1.VirtualMachineInstance, addresses []string, port int32, stage string) { + GinkgoHelper() + Expect(addresses).NotTo(BeEmpty()) + for _, ip := range addresses { + if ip == "" { + continue + } + for _, ip := range addresses { + iperfLogFile := fmt.Sprintf("/tmp/egress_test_%s_%d_iperf3.log", ip, port) + execFn := func(cmd string) (string, error) { + return kubevirt.RunCommand(vmi, cmd, 5*time.Second) + } + checkIperfTraffic(iperfLogFile, execFn, stage) + } + } + } + checkNorthSouthEgressICMPTraffic = func(vmi *kubevirtv1.VirtualMachineInstance, addresses []string, stage string) { GinkgoHelper() Expect(addresses).NotTo(BeEmpty()) @@ -926,20 +980,6 @@ iperf3 -t 0 -c %[1]s -p %[2]d --logfile %[3]s & return generateVM(fedoraWithTestToolingVMI(labels, annotations, nodeSelector, networkSource, userData, networkData)) } - fedoraVMI = func(labels map[string]string, annotations map[string]string, nodeSelector map[string]string, networkSource kubevirtv1.NetworkSource, userData, networkData string) *kubevirtv1.VirtualMachineInstance { - cloudInitVolumeSource := kubevirtv1.VolumeSource{ - CloudInitNoCloud: &kubevirtv1.CloudInitNoCloudSource{ - UserData: userData, - NetworkData: networkData, - }, - } - return generateVMI(labels, annotations, nodeSelector, networkSource, cloudInitVolumeSource, kubevirt.FedoraContainerDiskImage) - } - - fedoraVM = func(labels map[string]string, annotations map[string]string, nodeSelector map[string]string, networkSource kubevirtv1.NetworkSource, userData, networkData string) *kubevirtv1.VirtualMachine { - return generateVM(fedoraVMI(labels, annotations, nodeSelector, networkSource, userData, networkData)) - } - composeDefaultNetworkLiveMigratableVM = func(labels map[string]string, butane string) (*kubevirtv1.VirtualMachine, error) { annotations := map[string]string{ "kubevirt.io/allow-pod-bridge-network-live-migration": "", @@ -1041,7 +1081,7 @@ passwd: By("Wait some time for service to settle") endpoints := []*net.TCPConn{} Eventually(func() error { - endpoints, err = dialServiceNodePort(svc) + endpoints, err = dialServiceNodePort(clientSet, svc) return err }).WithPolling(3*time.Second).WithTimeout(60*time.Second).Should(Succeed(), "Should dial service port once service settled") @@ -1096,15 +1136,33 @@ passwd: } iperfServerScript = ` -dnf install -y psmisc procps +#!/bin/bash -xe iface=$(ifconfig |grep flags |grep -v "eth0\|lo" | sed "s/: .*//") +iface=${iface:-eth0} + ipv4=$(ifconfig $iface | grep "inet "|awk '{print $2}'| sed "s#/.*##") -ipv6=$(ifconfig $iface | grep inet6 |grep -v fe80 |awk '{print $2}'| sed "s#/.*##") if [ "$ipv4" != "" ]; then iperf3 -s -D --bind $ipv4 --logfile /tmp/test_${ipv4}_iperf3.log + sleep 1 + if grep "iperf3: error" /tmp/test_${ipv4}_iperf3.log; then + cat /tmp/test_${ipv4}_iperf3.log + exit 1 + fi fi + +cnt=0 +while [ "$ipv6" == "" -a $cnt -lt 10 ]; do + ipv6=$(ifconfig $iface | grep inet6 |grep -v fe80 |awk '{print $2}'| sed "s#/.*##") + sleep 1 + cnt=$((cnt+1)) +done if [ "$ipv6" != "" ]; then iperf3 -s -D --bind $ipv6 --logfile /tmp/test_${ipv6}_iperf3.log + sleep 1 + if grep "iperf3: error" /tmp/test_${ipv6}_iperf3.log; then + cat /tmp/test_${ipv6}_iperf3.log 1>&2 + exit 1 + fi fi ` nextIPs = func(idx int, subnets []string) ([]string, error) { @@ -1123,17 +1181,17 @@ fi return ips, nil } - createIperfServerPods = func(nodes []corev1.Node, netConfig networkAttachmentConfig, staticSubnets []string) ([]*corev1.Pod, error) { + createIperfServerPods = func(nodes []corev1.Node, udnName string, role udnv1.NetworkRole, staticSubnets []string) ([]*corev1.Pod, error) { var pods []*corev1.Pod for i, node := range nodes { var nse *nadapi.NetworkSelectionElement - if netConfig.role != "primary" { + if role != udnv1.NetworkRolePrimary { staticIPs, err := nextIPs(i, staticSubnets) if err != nil { return nil, err } nse = &nadapi.NetworkSelectionElement{ - Name: netConfig.name, + Name: udnName, IPRequest: staticIPs, } } @@ -1141,7 +1199,7 @@ fi if nse != nil { pod.Annotations = networkSelectionElements(*nse) } - pod.Spec.Containers[0].Image = iperf3Image + pod.Spec.Containers[0].Image = images.IPerf3() pod.Spec.Containers[0].Args = []string{iperfServerScript + "\n sleep infinity"} }) @@ -1186,8 +1244,9 @@ fi removeImagesInNode = func(node, imageURL string) error { By("Removing unused images in node " + node) - output, err := runCommand(containerRuntime, "exec", node, - "crictl", "images", "-o", "json") + output, err := infraprovider.Get().ExecK8NodeCommand(node, []string{ + "crictl", "images", "-o", "json", + }) if err != nil { return err } @@ -1199,13 +1258,15 @@ fi return err } if imageID != "" { - _, err = runCommand(containerRuntime, "exec", node, - "crictl", "rmi", imageID) + _, err = infraprovider.Get().ExecK8NodeCommand(node, []string{ + "crictl", "rmi", imageID, + }) if err != nil { return err } - _, err = runCommand(containerRuntime, "exec", node, - "crictl", "rmi", "--prune") + _, err = infraprovider.Get().ExecK8NodeCommand(node, []string{ + "crictl", "rmi", "--prune", + }) if err != nil { return err } @@ -1225,18 +1286,39 @@ fi return nil } - createIperfExternalContainer = func(name string) (string, string) { - return createClusterExternalContainer( - name, - iperf3Image, - []string{"--network", "kind", "--entrypoint", "/bin/bash"}, - []string{"-c", "sleep infinity"}, - ) + createCUDN = func(cudn *udnv1.ClusterUserDefinedNetwork) { + GinkgoHelper() + By("Creating ClusterUserDefinedNetwork") + Expect(crClient.Create(context.Background(), cudn)).To(Succeed()) + DeferCleanup(func() { + if e2eframework.TestContext.DeleteNamespace && (e2eframework.TestContext.DeleteNamespaceOnFailure || !CurrentSpecReport().Failed()) { + crClient.Delete(context.Background(), cudn) + } + }) + Eventually(clusterUserDefinedNetworkReadyFunc(fr.DynamicClient, cudn.Name), 5*time.Second, time.Second).Should(Succeed()) + } + + createRA = func(ra *rav1.RouteAdvertisements) { + GinkgoHelper() + By("Creating RouteAdvertisements") + Expect(crClient.Create(context.Background(), ra)).To(Succeed()) + DeferCleanup(func() { + if e2eframework.TestContext.DeleteNamespace && (e2eframework.TestContext.DeleteNamespaceOnFailure || !CurrentSpecReport().Failed()) { + crClient.Delete(context.Background(), ra) + } + }) + + By("ensure route advertisement matching CUDN was created successfully") + Eventually(func(g Gomega) string { + Expect(crClient.Get(context.TODO(), crclient.ObjectKeyFromObject(ra), ra)).To(Succeed()) + return ra.Status.Status + }, 30*time.Second, time.Second).Should(Equal("Accepted")) } ) BeforeEach(func() { // So we can use it at AfterEach, since fr.ClientSet is nil there clientSet = fr.ClientSet + providerCtx = infraprovider.Get().NewTestContext() var err error crClient, err = newControllerRuntimeClient() @@ -1431,7 +1513,7 @@ fi cmd func() string } var ( - nad *nadv1.NetworkAttachmentDefinition + cudn *udnv1.ClusterUserDefinedNetwork vm *kubevirtv1.VirtualMachine vmi *kubevirtv1.VirtualMachineInstance cidrIPv4 = "10.128.0.0/24" @@ -1461,7 +1543,18 @@ fi liveMigrateFailed(vmi) }, } - networkData = `version: 2 + // For secondary network interfaces: + // - DHCPv6 cannot be activated because KubeVirt does not support it. + // - In Fedora 39, the "may-fail" option is configured by cloud-init in + // NetworkManager. This causes the entire interface to remain inactive + // if no IPv6 address is assigned. + networkDataIPv4 = `version: 2 +ethernets: + eth0: + dhcp4: true +` + + networkDataDualStack = `version: 2 ethernets: eth0: dhcp4: true @@ -1474,24 +1567,21 @@ chpasswd: { expire: False } ` userDataWithIperfServer = userData + fmt.Sprintf(` -packages: - - iperf3 write_files: - path: /tmp/iperf-server.sh encoding: b64 content: %s permissions: '0755' -runcmd: -- /tmp/iperf-server.sh`, base64.StdEncoding.EncodeToString([]byte(iperfServerScript))) +`, base64.StdEncoding.EncodeToString([]byte(iperfServerScript))) virtualMachine = resourceCommand{ description: "VirtualMachine", cmd: func() string { vm = fedoraWithTestToolingVM(nil /*labels*/, nil /*annotations*/, nil /*nodeSelector*/, kubevirtv1.NetworkSource{ Multus: &kubevirtv1.MultusNetwork{ - NetworkName: nad.Name, + NetworkName: cudn.Name, }, - }, userData, networkData) + }, userData, networkDataIPv4) createVirtualMachine(vm) return vm.Name }, @@ -1500,10 +1590,10 @@ runcmd: virtualMachineWithUDN = resourceCommand{ description: "VirtualMachine with interface binding for UDN", cmd: func() string { - vm = fedoraVM(nil /*labels*/, nil /*annotations*/, nil, /*nodeSelector*/ + vm = fedoraWithTestToolingVM(nil /*labels*/, nil /*annotations*/, nil, /*nodeSelector*/ kubevirtv1.NetworkSource{ Pod: &kubevirtv1.PodNetwork{}, - }, userDataWithIperfServer, networkData) + }, userDataWithIperfServer, networkDataDualStack) vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Bridge = nil vm.Spec.Template.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} createVirtualMachine(vm) @@ -1516,9 +1606,9 @@ runcmd: cmd: func() string { vmi = fedoraWithTestToolingVMI(nil /*labels*/, nil /*annotations*/, nil /*nodeSelector*/, kubevirtv1.NetworkSource{ Multus: &kubevirtv1.MultusNetwork{ - NetworkName: nad.Name, + NetworkName: cudn.Name, }, - }, userData, networkData) + }, userData, networkDataIPv4) createVirtualMachineInstance(vmi) return vmi.Name }, @@ -1527,10 +1617,10 @@ runcmd: virtualMachineInstanceWithUDN = resourceCommand{ description: "VirtualMachineInstance with interface binding for UDN", cmd: func() string { - vmi = fedoraVMI(nil /*labels*/, nil /*annotations*/, nil, /*nodeSelector*/ + vmi = fedoraWithTestToolingVMI(nil /*labels*/, nil /*annotations*/, nil, /*nodeSelector*/ kubevirtv1.NetworkSource{ Pod: &kubevirtv1.PodNetwork{}, - }, userDataWithIperfServer, networkData) + }, userDataWithIperfServer, networkDataDualStack) vmi.Spec.Domain.Devices.Interfaces[0].Bridge = nil vmi.Spec.Domain.Devices.Interfaces[0].Binding = &kubevirtv1.PluginBinding{Name: "l2bridge"} createVirtualMachineInstance(vmi) @@ -1558,11 +1648,38 @@ runcmd: description string resource resourceCommand test testCommand - topology string - role string + topology udnv1.NetworkTopology + role udnv1.NetworkRole + ingress string } + var ( + containerNetwork = func(td testData) string { + if td.ingress == "routed" { + return "bgpnet" + } + return "kind" + } + exposeVMIperfServer = func(td testData, vmi *kubevirtv1.VirtualMachineInstance, vmiAddresses []string) ([]string, int32) { + GinkgoHelper() + if td.ingress == "routed" { + return vmiAddresses, iperf3DefaultPort + } + step := by(vmi.Name, "Expose VM iperf server as a service") + svc, err := fr.ClientSet.CoreV1().Services(namespace).Create(context.TODO(), composeService("iperf3-vm-server", vmi.Name, iperf3DefaultPort), metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(svc.Spec.Ports[0].NodePort).NotTo(Equal(0), step) + serverPort := svc.Spec.Ports[0].NodePort + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), fr.ClientSet, 1) + Expect(err).NotTo(HaveOccurred()) + serverIPs := e2enode.CollectAddresses(nodes, v1.NodeInternalIP) + return serverIPs, serverPort + } + ) DescribeTable("should keep ip", func(td testData) { - if td.role == "primary" && !isInterconnectEnabled() { + if td.role == "" { + td.role = udnv1.NetworkRoleSecondary + } + if td.role == udnv1.NetworkRolePrimary && !isInterconnectEnabled() { const upstreamIssue = "https://github.com/ovn-org/ovn-kubernetes/issues/4528" e2eskipper.Skipf( "The egress check of tests are known to fail on non-IC deployments. Upstream issue: %s", upstreamIssue, @@ -1572,7 +1689,7 @@ runcmd: l := map[string]string{ "e2e-framework": fr.BaseName, } - if td.role == "primary" { + if td.role == udnv1.NetworkRolePrimary { l[RequiredUDNNamespaceLabel] = "" } ns, err := fr.CreateNamespace(context.TODO(), fr.BaseName, l) @@ -1580,17 +1697,11 @@ runcmd: fr.Namespace = ns namespace = fr.Namespace.Name - netConfig := newNetworkAttachmentConfig( - networkAttachmentConfigParams{ - namespace: namespace, - name: "net1", - topology: td.topology, - cidr: correctCIDRFamily(cidrIPv4, cidrIPv6), - allowPersistentIPs: true, - role: td.role, - }) + networkName := "" + cidrs := generateL2Subnets(cidrIPv4, cidrIPv6) + cudn, networkName = kubevirt.GenerateCUDN(namespace, "net1", td.topology, td.role, cidrs) - if td.topology == "localnet" { + if td.topology == udnv1.NetworkTopologyLocalnet { By("setting up the localnet underlay") nodes := ovsPods(clientSet) Expect(nodes).NotTo(BeEmpty()) @@ -1602,27 +1713,79 @@ runcmd: }) const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, networkName, 0 /*vlanID*/)).To(Succeed()) + } + createCUDN(cudn) + + if td.ingress == "routed" { + createRA(&rav1.RouteAdvertisements{ + ObjectMeta: metav1.ObjectMeta{ + Name: cudn.Name, + }, + Spec: rav1.RouteAdvertisementsSpec{ + Advertisements: []rav1.AdvertisementType{rav1.PodNetwork}, + NetworkSelectors: crdtypes.NetworkSelectors{{ + NetworkSelectionType: crdtypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &crdtypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"name": cudn.Name}, + }, + }, + }}, + }, + }) } - By("Creating NetworkAttachmentDefinition") - nad = generateNAD(netConfig) - Expect(crClient.Create(context.Background(), nad)).To(Succeed()) workerNodeList, err := fr.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{LabelSelector: labels.FormatLabels(map[string]string{"node-role.kubernetes.io/worker": ""})}) Expect(err).NotTo(HaveOccurred()) selectedNodes = workerNodeList.Items Expect(selectedNodes).NotTo(BeEmpty()) - iperfServerTestPods, err = createIperfServerPods(selectedNodes, netConfig, []string{}) + iperfServerTestPods, err = createIperfServerPods(selectedNodes, cudn.Name, td.role, []string{}) Expect(err).NotTo(HaveOccurred()) + network, err := infraprovider.Get().PrimaryNetwork() + Expect(err).ShouldNot(HaveOccurred(), "primary network must be available to attach containers") + if containerNetwork := containerNetwork(td); containerNetwork != network.Name() { + network, err = infraprovider.Get().GetNetwork(containerNetwork) + Expect(err).ShouldNot(HaveOccurred(), "must to get alternative network") + } + externalContainerPort := infraprovider.Get().GetExternalContainerPort() externalContainerName := namespace + "-iperf" - externalContainerIPV4Address, externalContainerIPV6Address := createIperfExternalContainer(externalContainerName) - DeferCleanup(func() { - if e2eframework.TestContext.DeleteNamespace && (e2eframework.TestContext.DeleteNamespaceOnFailure || !CurrentSpecReport().Failed()) { - deleteClusterExternalContainer(externalContainerName) - } - }) + externalContainerSpec := infraapi.ExternalContainer{ + Name: externalContainerName, + Image: images.IPerf3(), + Network: network, + Args: []string{"sleep infinity"}, + ExtPort: externalContainerPort, + } + externalContainer, err := providerCtx.CreateExternalContainer(externalContainerSpec) + Expect(err).ShouldNot(HaveOccurred(), "creation of external container is test dependency") + + var externalContainerIPs []string + if externalContainer.IsIPv4() { + externalContainerIPs = append(externalContainerIPs, externalContainer.IPv4) + } + if externalContainer.IsIPv6() { + externalContainerIPs = append(externalContainerIPs, externalContainer.IPv6) + } + + if td.ingress == "routed" { + // pre=created test dependency and therefore we dont delete + frrExternalContainer := infraapi.ExternalContainer{Name: "frr"} + frrNetwork, err := infraprovider.Get().GetNetwork(containerNetwork(td)) + Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("failed to fetch network %q: %v", containerNetwork(td), err)) + frrExternalContainerInterface, err := infraprovider.Get().GetExternalContainerNetworkInterface(frrExternalContainer, frrNetwork) + Expect(err).NotTo(HaveOccurred(), "must fetch FRR container network interface attached to secondary network") + + output, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"bash", "-c", fmt.Sprintf(` +set -xe +dnf install -y iproute +ip route add %[1]s via %[2]s +ip route add %[3]s via %[4]s +`, cidrIPv4, frrExternalContainerInterface.GetIPv4(), cidrIPv6, frrExternalContainerInterface.GetIPv6())}) + Expect(err).NotTo(HaveOccurred(), output) + } vmiName := td.resource.cmd() vmi = &kubevirtv1.VirtualMachineInstance{ @@ -1643,24 +1806,18 @@ runcmd: WithPolling(time.Second). Should(Succeed(), step) - step = by(vmi.Name, "Wait for cloud init to finish at first boot") - output, err := kubevirt.RunCommand(vmi, "cloud-init status --wait", time.Minute) - Expect(err).NotTo(HaveOccurred(), step+": "+output) - // expect 2 addresses on dual-stack deployments; 1 on single-stack step = by(vmi.Name, "Wait for addresses at the virtual machine") - expectedNumberOfAddresses := len(strings.Split(netConfig.cidr, ",")) + expectedNumberOfAddresses := len(cidrs) expectedAddreses := virtualMachineAddressesFromStatus(vmi, expectedNumberOfAddresses) expectedAddresesAtGuest := expectedAddreses - testPodsIPs := podsMultusNetworkIPs(iperfServerTestPods, podNetworkStatusByNetConfigPredicate(netConfig)) + testPodsIPs := podsMultusNetworkIPs(iperfServerTestPods, podNetworkStatusByNetConfigPredicate(namespace, cudn.Name, strings.ToLower(string(td.role)))) - step = by(vmi.Name, "Expose VM iperf server as a service") - svc, err := fr.ClientSet.CoreV1().Services(namespace).Create(context.TODO(), composeService("iperf3-vm-server", vmi.Name, 5201), metav1.CreateOptions{}) - Expect(svc.Spec.Ports[0].NodePort).NotTo(Equal(0), step) + serverIPs, serverPort := exposeVMIperfServer(td, vmi, expectedAddreses) // IPv6 is not support for secondaries with IPAM so guest will // have only ipv4. - if td.role != "primary" { + if td.role != udnv1.NetworkRolePrimary { expectedAddresesAtGuest, err = util.MatchAllIPStringFamily(false /*ipv4*/, expectedAddreses) Expect(err).NotTo(HaveOccurred()) testPodsIPs = filterOutIPv6(testPodsIPs) @@ -1677,19 +1834,14 @@ runcmd: Expect(startEastWestIperfTraffic(vmi, testPodsIPs, step)).To(Succeed(), step) checkEastWestIperfTraffic(vmi, testPodsIPs, step) - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), fr.ClientSet, 1) - Expect(err).NotTo(HaveOccurred()) - - nodeIPs := e2enode.CollectAddresses(nodes, v1.NodeInternalIP) - - if td.role == "primary" { + if td.role == udnv1.NetworkRolePrimary { if isIPv6Supported() && isInterconnectEnabled() { step = by(vmi.Name, fmt.Sprintf("Checking IPv6 gateway before %s %s", td.resource.description, td.test.description)) nodeRunningVMI, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.NodeName, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred(), step) - expectedIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(nodeRunningVMI, netConfig.networkName) + expectedIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(nodeRunningVMI, networkName) Expect(err).NotTo(HaveOccurred()) Eventually(kubevirt.RetrieveIPv6Gateways). WithArguments(vmi). @@ -1698,9 +1850,23 @@ runcmd: Should(Equal([]string{expectedIPv6GatewayPath}), "should filter remote ipv6 gateway nexthop") } step = by(vmi.Name, fmt.Sprintf("Check north/south traffic before %s %s", td.resource.description, td.test.description)) - startNorthSouthIngressIperfTraffic(externalContainerName, nodeIPs, svc.Spec.Ports[0].NodePort, step) - checkNorthSouthIngressIperfTraffic(externalContainerName, nodeIPs, svc.Spec.Ports[0].NodePort, step) - checkNorthSouthEgressICMPTraffic(vmi, []string{externalContainerIPV4Address, externalContainerIPV6Address}, step) + output, err := kubevirt.RunCommand(vmi, "/tmp/iperf-server.sh", time.Minute) + Expect(err).NotTo(HaveOccurred(), step+": "+output) + Expect(startNorthSouthIngressIperfTraffic(externalContainerName, serverIPs, serverPort, step)).To(Succeed()) + checkNorthSouthIngressIperfTraffic(externalContainerName, serverIPs, serverPort, step) + checkNorthSouthEgressICMPTraffic(vmi, externalContainerIPs, step) + if td.ingress == "routed" { + _, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"bash", "-c", iperfServerScript}) + Expect(err).NotTo(HaveOccurred(), step) + Expect(startNorthSouthEgressIperfTraffic(vmi, externalContainerIPs, iperf3DefaultPort, step)).To(Succeed()) + By("Check egress src ip is not node IP on 'routed' ingress mode") + for _, vmAddress := range expectedAddreses { + output, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{ + "bash", "-c", fmt.Sprintf("grep 'connected to %s' /tmp/test_*", vmAddress)}) + Expect(err).NotTo(HaveOccurred(), step+": "+output) + } + checkNorthSouthEgressIperfTraffic(vmi, externalContainerIPs, iperf3DefaultPort, step) + } } by(vmi.Name, fmt.Sprintf("Running %s for %s", td.test.description, td.resource.description)) @@ -1709,12 +1875,6 @@ runcmd: step = by(vmi.Name, fmt.Sprintf("Login to virtual machine after %s %s", td.resource.description, td.test.description)) Expect(kubevirt.LoginToFedora(vmi, "fedora", "fedora")).To(Succeed(), step) - if td.test.description == restart.description { - step := by(vmi.Name, "Wait for cloud init to finish after restart") - output, err = kubevirt.RunCommand(vmi, "cloud-init status --wait", time.Minute) - Expect(err).NotTo(HaveOccurred(), step+": "+output) - } - obtainedAddresses := virtualMachineAddressesFromStatus(vmi, expectedNumberOfAddresses) Expect(obtainedAddresses).To(Equal(expectedAddreses)) @@ -1726,21 +1886,25 @@ runcmd: step = by(vmi.Name, fmt.Sprintf("Check east/west traffic after %s %s", td.resource.description, td.test.description)) if td.test.description == restart.description { - // At restart we need re-connect Expect(startEastWestIperfTraffic(vmi, testPodsIPs, step)).To(Succeed(), step) - if td.role == "primary" { - startNorthSouthIngressIperfTraffic(externalContainerName, nodeIPs, svc.Spec.Ports[0].NodePort, step) + if td.role == udnv1.NetworkRolePrimary { + output, err := kubevirt.RunCommand(vmi, "/tmp/iperf-server.sh &", time.Minute) + Expect(err).NotTo(HaveOccurred(), step+": "+output) + Expect(startNorthSouthIngressIperfTraffic(externalContainerName, serverIPs, serverPort, step)).To(Succeed()) } } checkEastWestIperfTraffic(vmi, testPodsIPs, step) - if td.role == "primary" { + if td.role == udnv1.NetworkRolePrimary { step = by(vmi.Name, fmt.Sprintf("Check north/south traffic after %s %s", td.resource.description, td.test.description)) - checkNorthSouthIngressIperfTraffic(externalContainerName, nodeIPs, svc.Spec.Ports[0].NodePort, step) - checkNorthSouthEgressICMPTraffic(vmi, []string{externalContainerIPV4Address, externalContainerIPV6Address}, step) + checkNorthSouthIngressIperfTraffic(externalContainerName, serverIPs, serverPort, step) + checkNorthSouthEgressICMPTraffic(vmi, externalContainerIPs, step) + if td.ingress == "routed" { + checkNorthSouthEgressIperfTraffic(vmi, externalContainerIPs, iperf3DefaultPort, step) + } } - if td.role == "primary" && td.test.description == liveMigrate.description && isInterconnectEnabled() { + if td.role == udnv1.NetworkRolePrimary && td.test.description == liveMigrate.description && isInterconnectEnabled() { if isIPv4Supported() { step = by(vmi.Name, fmt.Sprintf("Checking IPv4 gateway cached mac after %s %s", td.resource.description, td.test.description)) Expect(crClient.Get(context.TODO(), crclient.ObjectKeyFromObject(vmi), vmi)).To(Succeed()) @@ -1748,7 +1912,7 @@ runcmd: targetNode, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.MigrationState.TargetNode, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred(), step) - expectedGatewayMAC, err := kubevirt.GenerateGatewayMAC(targetNode, netConfig.networkName) + expectedGatewayMAC, err := kubevirt.GenerateGatewayMAC(targetNode, networkName) Expect(err).NotTo(HaveOccurred(), step) Expect(err).NotTo(HaveOccurred(), step) @@ -1764,7 +1928,7 @@ runcmd: targetNode, err := fr.ClientSet.CoreV1().Nodes().Get(context.Background(), vmi.Status.MigrationState.TargetNode, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred(), step) - targetNodeIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(targetNode, netConfig.networkName) + targetNodeIPv6GatewayPath, err := kubevirt.GenerateGatewayIPv6RouterLLA(targetNode, networkName) Expect(err).NotTo(HaveOccurred()) Eventually(kubevirt.RetrieveIPv6Gateways). WithArguments(vmi). @@ -1775,70 +1939,81 @@ runcmd: } }, func(td testData) string { - role := "secondary" + role := udnv1.NetworkRoleSecondary if td.role != "" { role = td.role } - return fmt.Sprintf("after %s of %s with %s/%s", td.test.description, td.resource.description, role, td.topology) + ingress := "snat" + if td.ingress != "" { + ingress = td.ingress + } + return fmt.Sprintf("after %s of %s with %s/%s with %q ingress", td.test.description, td.resource.description, role, td.topology, ingress) }, Entry(nil, testData{ resource: virtualMachine, test: restart, - topology: "localnet", + topology: udnv1.NetworkTopologyLocalnet, }), Entry(nil, testData{ resource: virtualMachine, test: restart, - topology: "layer2", + topology: udnv1.NetworkTopologyLayer2, }), Entry(nil, testData{ resource: virtualMachineWithUDN, test: restart, - topology: "layer2", - role: "primary", + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, }), Entry(nil, testData{ resource: virtualMachine, test: liveMigrate, - topology: "localnet", + topology: udnv1.NetworkTopologyLocalnet, }), Entry(nil, testData{ resource: virtualMachine, test: liveMigrate, - topology: "layer2", + topology: udnv1.NetworkTopologyLayer2, + }), + Entry(nil, testData{ + resource: virtualMachineWithUDN, + test: liveMigrate, + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, }), Entry(nil, testData{ resource: virtualMachineWithUDN, test: liveMigrate, - topology: "layer2", - role: "primary", + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, + ingress: "routed", }), Entry(nil, testData{ resource: virtualMachineInstance, test: liveMigrate, - topology: "localnet", + topology: udnv1.NetworkTopologyLocalnet, }), Entry(nil, testData{ resource: virtualMachineInstance, test: liveMigrate, - topology: "layer2", + topology: udnv1.NetworkTopologyLayer2, }), Entry(nil, testData{ resource: virtualMachineInstanceWithUDN, test: liveMigrate, - topology: "layer2", - role: "primary", + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, }), Entry(nil, testData{ resource: virtualMachineInstanceWithUDN, test: liveMigrateFailed, - topology: "layer2", - role: "primary", + topology: udnv1.NetworkTopologyLayer2, + role: udnv1.NetworkRolePrimary, }), Entry(nil, testData{ resource: virtualMachineInstance, test: liveMigrateFailed, - topology: "localnet", + topology: udnv1.NetworkTopologyLocalnet, }), ) }) @@ -1879,18 +2054,10 @@ runcmd: }) fr.Namespace = ns namespace = fr.Namespace.Name - - netConfig := newNetworkAttachmentConfig( - networkAttachmentConfigParams{ - namespace: namespace, - name: "net1", - topology: "layer2", - cidr: correctCIDRFamily(cidrIPv4, cidrIPv6), - role: "primary", - mtu: 1300, - }) - By("Creating NetworkAttachmentDefinition") - Expect(crClient.Create(context.Background(), generateNAD(netConfig))).To(Succeed()) + cidrs := generateL2Subnets(cidrIPv4, cidrIPv6) + cudn, _ := kubevirt.GenerateCUDN(namespace, "net1", udnv1.NetworkTopologyLayer2, udnv1.NetworkRolePrimary, cidrs) + cudn.Spec.Network.Layer2.MTU = 1300 + createCUDN(cudn) By("Create virt-launcher pod") kubevirtPod := kubevirt.GenerateFakeVirtLauncherPod(namespace, "vm1") @@ -1991,7 +2158,6 @@ runcmd: Expect(removeImagesInNodes(kubevirt.FedoraContainerDiskImage)).To(Succeed()) }) var ( - nad *nadv1.NetworkAttachmentDefinition ipv4CIDR = "10.128.0.0/24" ipv6CIDR = "2010:100:200::0/60" vmiIPv4 = "10.128.0.100/24" @@ -2025,13 +2191,6 @@ chpasswd: { expire: False } ` ) DescribeTable("should maintain tcp connection with minimal downtime", func(td func(vmi *kubevirtv1.VirtualMachineInstance)) { - netConfig := newNetworkAttachmentConfig( - networkAttachmentConfigParams{ - namespace: fr.Namespace.Name, - name: "net1", - topology: "localnet", - }) - By("setting up the localnet underlay") nodes := ovsPods(clientSet) Expect(nodes).NotTo(BeEmpty()) @@ -2042,18 +2201,18 @@ chpasswd: { expire: False } } }) + cudn, networkName := kubevirt.GenerateCUDN(namespace, "net1", udnv1.NetworkTopologyLocalnet, udnv1.NetworkRoleSecondary, udnv1.DualStackCIDRs{}) + createCUDN(cudn) + const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, networkName, 0 /*vlanID*/)).To(Succeed()) - By("Creating NetworkAttachmentDefinition") - nad = generateNAD(netConfig) - Expect(crClient.Create(context.Background(), nad)).To(Succeed()) workerNodeList, err := fr.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{LabelSelector: labels.FormatLabels(map[string]string{"node-role.kubernetes.io/worker": ""})}) Expect(err).NotTo(HaveOccurred()) selectedNodes = workerNodeList.Items Expect(selectedNodes).NotTo(BeEmpty()) - iperfServerTestPods, err = createIperfServerPods(selectedNodes, netConfig, cidr) + iperfServerTestPods, err = createIperfServerPods(selectedNodes, cudn.Name, cudn.Spec.Network.Localnet.Role, cidr) Expect(err).NotTo(HaveOccurred()) networkData, err := staticIPsNetworkData(selectCIDRs(vmiIPv4, vmiIPv6)) @@ -2061,7 +2220,7 @@ chpasswd: { expire: False } vmi := fedoraWithTestToolingVMI(nil /*labels*/, nil /*annotations*/, nil /*nodeSelector*/, kubevirtv1.NetworkSource{ Multus: &kubevirtv1.MultusNetwork{ - NetworkName: nad.Name, + NetworkName: cudn.Name, }, }, userData, networkData) // Harcode mac address so it's the same after live migration @@ -2083,7 +2242,7 @@ chpasswd: { expire: False } output, err := kubevirt.RunCommand(vmi, "cloud-init status --wait", time.Minute) Expect(err).NotTo(HaveOccurred(), step+": "+output) - testPodsIPs := podsMultusNetworkIPs(iperfServerTestPods, podNetworkStatusByNetConfigPredicate(netConfig)) + testPodsIPs := podsMultusNetworkIPs(iperfServerTestPods, podNetworkStatusByNetConfigPredicate(namespace, cudn.Name, strings.ToLower(string(cudn.Spec.Network.Localnet.Role)))) Expect(testPodsIPs).NotTo(BeEmpty()) step = by(vmi.Name, "Check east/west traffic before virtual machine instance live migration") diff --git a/test/e2e/kubevirt/types.go b/test/e2e/kubevirt/types.go index 809dad0acb..3246c87745 100644 --- a/test/e2e/kubevirt/types.go +++ b/test/e2e/kubevirt/types.go @@ -2,7 +2,7 @@ package kubevirt const ( FedoraCoreOSContainerDiskImage = "quay.io/kubevirtci/fedora-coreos-kubevirt:v20230905-be4fa50" - FedoraWithTestToolingContainerDiskImage = "quay.io/kubevirtci/fedora-with-test-tooling:v20241128-4d4c8fe" + FedoraWithTestToolingContainerDiskImage = "quay.io/kubevirtci/fedora-with-test-tooling:v20250416-e37573e" FedoraContainerDiskImage = "quay.io/containerdisks/fedora:39" FakeLauncherImage = "quay.io/nmstate/c10s-nmstate-dev:latest" ) diff --git a/test/e2e/kubevirt/udn.go b/test/e2e/kubevirt/udn.go new file mode 100644 index 0000000000..926040dffe --- /dev/null +++ b/test/e2e/kubevirt/udn.go @@ -0,0 +1,68 @@ +package kubevirt + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// GenerateCUDN creates a new ClusterUserDefinedNetwork (CUDN) object with the specified parameters. +// Parameters: +// - namespace: The namespace in which the CUDN will be created. +// - name: The name of the CUDN. +// - topology: The network topology for the CUDN. +// - role: The network role for the CUDN. +// - subnets: The dual-stack CIDRs for the CUDN. +// Returns: +// - A pointer to the created ClusterUserDefinedNetwork object. +// - A string representation of the CUDN's network name. +func GenerateCUDN(namespace, name string, topology udnv1.NetworkTopology, role udnv1.NetworkRole, subnets udnv1.DualStackCIDRs) (*udnv1.ClusterUserDefinedNetwork, string) { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + // Generate a unique name for the CUDN by combining the namespace and name and add + // a label with the same value for easy identification, for example at the RouteAdvertisement + // CUDN selector + Name: namespace + "-" + name, + Labels: map[string]string{ + "name": namespace + "-" + name, + }, + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{namespace}, + }}}, + Network: udnv1.NetworkSpec{ + Topology: topology, + }, + }, + } + ipam := &udnv1.IPAMConfig{ + Mode: udnv1.IPAMDisabled, + } + + if len(subnets) > 0 { + ipam.Mode = udnv1.IPAMEnabled + ipam.Lifecycle = udnv1.IPAMLifecyclePersistent + } + + networkName := util.GenerateCUDNNetworkName(cudn.Name) + if topology == udnv1.NetworkTopologyLayer2 { + cudn.Spec.Network.Layer2 = &udnv1.Layer2Config{ + Role: role, + Subnets: subnets, + IPAM: ipam, + } + } else if topology == udnv1.NetworkTopologyLocalnet { + cudn.Spec.Network.Localnet = &udnv1.LocalnetConfig{ + Role: role, + Subnets: subnets, + IPAM: ipam, + PhysicalNetworkName: networkName, + } + } + + return cudn, networkName +} diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go index 056158e599..03649143dd 100644 --- a/test/e2e/localnet-underlay.go +++ b/test/e2e/localnet-underlay.go @@ -6,10 +6,14 @@ import ( "os" "os/exec" "strings" + "time" + + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" + e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" ) const ( @@ -19,27 +23,28 @@ const ( del = "del-br" ) -func setupUnderlay(ovsPods []v1.Pod, bridgeName, portName string, nadConfig networkAttachmentConfig) error { +func setupUnderlay(ovsPods []v1.Pod, bridgeName, portName, networkName string, vlanID int) error { for _, ovsPod := range ovsPods { if bridgeName != defaultOvsBridge { - if err := addOVSBridge(ovsPod.Name, bridgeName); err != nil { + if err := addOVSBridge(ovsPod.Namespace, ovsPod.Name, bridgeName); err != nil { return err } - if nadConfig.vlanID > 0 { - if err := ovsEnableVLANAccessPort(ovsPod.Name, bridgeName, portName, nadConfig.vlanID); err != nil { + if vlanID > 0 { + if err := ovsEnableVLANAccessPort(ovsPod.Namespace, ovsPod.Name, bridgeName, portName, vlanID); err != nil { return err } } else { - if err := ovsAttachPortToBridge(ovsPod.Name, bridgeName, portName); err != nil { + if err := ovsAttachPortToBridge(ovsPod.Namespace, ovsPod.Name, bridgeName, portName); err != nil { return err } } } if err := configureBridgeMappings( + ovsPod.Namespace, ovsPod.Name, defaultNetworkBridgeMapping(), - bridgeMapping(nadConfig.networkName, bridgeName), + bridgeMapping(networkName, bridgeName), ); err != nil { return err } @@ -49,11 +54,11 @@ func setupUnderlay(ovsPods []v1.Pod, bridgeName, portName string, nadConfig netw func ovsRemoveSwitchPort(ovsPods []v1.Pod, portName string, newVLANID int) error { for _, ovsPod := range ovsPods { - if err := ovsRemoveVLANAccessPort(ovsPod.Name, secondaryBridge, portName); err != nil { + if err := ovsRemoveVLANAccessPort(ovsPod.Namespace, ovsPod.Name, secondaryBridge, portName); err != nil { return fmt.Errorf("failed to remove old VLAN port: %v", err) } - if err := ovsEnableVLANAccessPort(ovsPod.Name, secondaryBridge, portName, newVLANID); err != nil { + if err := ovsEnableVLANAccessPort(ovsPod.Namespace, ovsPod.Name, secondaryBridge, portName, newVLANID); err != nil { return fmt.Errorf("failed to add new VLAN port: %v", err) } } @@ -64,12 +69,13 @@ func ovsRemoveSwitchPort(ovsPods []v1.Pod, portName string, newVLANID int) error func teardownUnderlay(ovsPods []v1.Pod, bridgeName string) error { for _, ovsPod := range ovsPods { if bridgeName != defaultOvsBridge { - if err := removeOVSBridge(ovsPod.Name, bridgeName); err != nil { + if err := removeOVSBridge(ovsPod.Namespace, ovsPod.Name, bridgeName); err != nil { return err } } // restore default bridge mapping if err := configureBridgeMappings( + ovsPod.Namespace, ovsPod.Name, defaultNetworkBridgeMapping(), ); err != nil { @@ -83,7 +89,7 @@ func ovsPods(clientSet clientset.Interface) []v1.Pod { const ( ovsNodeLabel = "app=ovs-node" ) - pods, err := clientSet.CoreV1().Pods(ovnNamespace).List( + pods, err := clientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List( context.Background(), metav1.ListOptions{LabelSelector: ovsNodeLabel}, ) @@ -93,63 +99,49 @@ func ovsPods(clientSet clientset.Interface) []v1.Pod { return pods.Items } -func addOVSBridge(ovnNodeName string, bridgeName string) error { - _, err := runCommand(ovsBridgeCommand(ovnNodeName, add, bridgeName)...) - if err != nil { - return fmt.Errorf("failed to ADD OVS bridge %s: %v", bridgeName, err) +func addOVSBridge(podNamespace, podName string, bridgeName string) error { + cmd := strings.Join([]string{"ovs-vsctl", add, bridgeName}, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { + return fmt.Errorf("failed to add ovs bridge %q: %v", bridgeName, err) } return nil } -func removeOVSBridge(ovnNodeName string, bridgeName string) error { - _, err := runCommand(ovsBridgeCommand(ovnNodeName, del, bridgeName)...) - if err != nil { - return fmt.Errorf("failed to DELETE OVS bridge %s: %v", bridgeName, err) +func removeOVSBridge(podNamespace, podName string, bridgeName string) error { + cmd := strings.Join([]string{"ovs-vsctl", del, bridgeName}, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { + return fmt.Errorf("failed to add ovs bridge %q: %v", bridgeName, err) } return nil } -func ovsBridgeCommand(ovnNodeName string, addOrDeleteCmd string, bridgeName string) []string { - return []string{ - "kubectl", "-n", ovnNamespace, "exec", ovnNodeName, "--", - "ovs-vsctl", addOrDeleteCmd, bridgeName, - } -} - -func ovsAttachPortToBridge(ovsNodeName string, bridgeName string, portName string) error { - cmd := []string{ - "kubectl", "-n", ovnNamespace, "exec", ovsNodeName, "--", +func ovsAttachPortToBridge(podNamespace, podName string, bridgeName string, portName string) error { + cmd := strings.Join([]string{ "ovs-vsctl", "add-port", bridgeName, portName, + }, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { + return fmt.Errorf("failed to remove port %s from OVS bridge %s: %v", portName, bridgeName, err) } - if _, err := runCommand(cmd...); err != nil { - return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) - } - return nil } -func ovsEnableVLANAccessPort(ovsNodeName string, bridgeName string, portName string, vlanID int) error { - cmd := []string{ - "kubectl", "-n", ovnNamespace, "exec", ovsNodeName, "--", - "ovs-vsctl", "--may-exist", "add-port", bridgeName, portName, fmt.Sprintf("tag=%d", vlanID), "vlan_mode=access", - } - if _, err := runCommand(cmd...); err != nil { - return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) +func ovsEnableVLANAccessPort(podNamespace, podName string, bridgeName string, portName string, vlanID int) error { + cmd := strings.Join([]string{ + "ovs-vsctl", "add-port", bridgeName, portName, fmt.Sprintf("tag=%d", vlanID), "vlan_mode=access", + }, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { + return fmt.Errorf("failed to remove port %s from OVS bridge %s: %v", portName, bridgeName, err) } - return nil } -func ovsRemoveVLANAccessPort(ovsNodeName string, bridgeName string, portName string) error { - cmd := []string{ - "kubectl", "-n", ovnNamespace, "exec", ovsNodeName, "--", +func ovsRemoveVLANAccessPort(podNamespace, podName string, bridgeName string, portName string) error { + cmd := strings.Join([]string{ "ovs-vsctl", "del-port", bridgeName, portName, - } - - if _, err := runCommand(cmd...); err != nil { + }, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { return fmt.Errorf("failed to remove port %s from OVS bridge %s: %v", portName, bridgeName, err) } - return nil } @@ -176,13 +168,13 @@ func Map[T, V any](items []T, fn func(T) V) []V { return result } -func configureBridgeMappings(ovnNodeName string, mappings ...BridgeMapping) error { +func configureBridgeMappings(podNamespace, podName string, mappings ...BridgeMapping) error { mappingsString := fmt.Sprintf("external_ids:ovn-bridge-mappings=%s", BridgeMappings(mappings).String()) - cmd := []string{"kubectl", "-n", ovnNamespace, "exec", ovnNodeName, - "--", "ovs-vsctl", "set", "open", ".", mappingsString, + cmd := strings.Join([]string{"ovs-vsctl", "set", "open", ".", mappingsString}, " ") + if _, err := e2epodoutput.RunHostCmdWithRetries(podNamespace, podName, cmd, time.Second, time.Second*5); err != nil { + return fmt.Errorf("failed to configure bridge mappings %q: %v", mappingsString, err) } - _, err := runCommand(cmd...) - return err + return nil } func defaultNetworkBridgeMapping() BridgeMapping { diff --git a/test/e2e/multi_node_zones_interconnect.go b/test/e2e/multi_node_zones_interconnect.go index 492a67b55a..5737ec3680 100644 --- a/test/e2e/multi_node_zones_interconnect.go +++ b/test/e2e/multi_node_zones_interconnect.go @@ -8,6 +8,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -40,8 +41,8 @@ func changeNodeZone(node *v1.Node, zone string, cs clientset.Interface) error { framework.ExpectNoError(err) // Restart the ovnkube-node on this node - err = restartOVNKubeNodePod(cs, ovnNamespace, node.Name) - framework.ExpectNoError(err) + err = restartOVNKubeNodePod(cs, deploymentconfig.Get().OVNKubernetesNamespace(), node.Name) + framework.ExpectNoError(err, "must get OVN-Kubernetes deployment config for Node %s and namespace %s", node.Name, deploymentconfig.Get().OVNKubernetesNamespace()) // Verify that the node is moved to the expected zone err = wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 88b15568d8..49884ab548 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -21,6 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" mnpclient "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1beta1" @@ -301,7 +302,7 @@ var _ = Describe("Multi Homing", func() { By("tearing down the localnet underlay") Expect(teardownUnderlay(pods, defaultOvsBridge)).To(Succeed()) }() - Expect(setupUnderlay(pods, defaultOvsBridge, "", netConfig)).To(Succeed()) + Expect(setupUnderlay(pods, defaultOvsBridge, "", netConfig.networkName, netConfig.vlanID)).To(Succeed()) nad := generateNAD(netConfig) By(fmt.Sprintf("creating the attachment configuration: %v\n", nad)) @@ -394,10 +395,11 @@ var _ = Describe("Multi Homing", func() { Context("multiple pods connected to the same OVN-K secondary network", func() { const ( - clientPodName = "client-pod" - port = 9000 workerOneNodeName = "ovn-worker" workerTwoNodeName = "ovn-worker2" + clientPodName = "client-pod" + nodeHostnameKey = "kubernetes.io/hostname" + port = 9000 clientIP = "192.168.200.10/24" staticServerIP = "192.168.200.20/24" ) @@ -490,7 +492,7 @@ var _ = Describe("Multi Homing", func() { }() const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed()) } By("creating the attachment configuration") @@ -501,6 +503,12 @@ var _ = Describe("Multi Homing", func() { ) Expect(err).NotTo(HaveOccurred()) + By("Get two scheduable nodes and schedule client and server to be on distinct Nodes") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), f.ClientSet, 2) + framework.ExpectNoError(err, "2 scheduable nodes are required") + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].GetName()} + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].GetName()} + By("instantiating the server pod") serverPod, err := cs.CoreV1().Pods(serverPodConfig.namespace).Create( context.Background(), @@ -577,15 +585,13 @@ var _ = Describe("Multi Homing", func() { cidr: secondaryNetworkCIDR, }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -648,15 +654,13 @@ var _ = Describe("Multi Homing", func() { cidr: strings.Join([]string{netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), netCIDR(secondaryIPv6CIDR, netPrefixLengthIPv6PerNode)}, ","), }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -707,15 +711,13 @@ var _ = Describe("Multi Homing", func() { cidr: secondaryIPv6CIDR, }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -726,15 +728,13 @@ var _ = Describe("Multi Homing", func() { cidr: strings.Join([]string{secondaryFlatL2NetworkCIDR, secondaryIPv6CIDR}, ","), }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -746,15 +746,13 @@ var _ = Describe("Multi Homing", func() { vlanID: localnetVLANID, }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -767,14 +765,12 @@ var _ = Describe("Multi Homing", func() { podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, isPrivileged: true, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, isPrivileged: true, }, ), @@ -790,8 +786,7 @@ var _ = Describe("Multi Homing", func() { Name: secondaryNetworkName, IPRequest: []string{clientIP}, }}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{ @@ -800,7 +795,6 @@ var _ = Describe("Multi Homing", func() { }}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -812,15 +806,13 @@ var _ = Describe("Multi Homing", func() { vlanID: localnetVLANID, }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ginkgo.Entry( @@ -832,28 +824,26 @@ var _ = Describe("Multi Homing", func() { vlanID: localnetVLANID, }, podConfiguration{ - attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, - name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: podName, containerCmd: httpServerContainerCmd(port), - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), ) Context("localnet OVN-K secondary network", func() { const ( - clientPodName = "client-pod" - nodeHostnameKey = "kubernetes.io/hostname" - servicePort = 9000 - dockerNetworkName = "underlay" - underlayServiceIP = "60.128.0.1" - secondaryInterfaceName = "eth1" - expectedOriginalMTU = 1200 + clientPodName = "client-pod" + nodeHostnameKey = "kubernetes.io/hostname" + servicePort uint16 = 9000 + dockerNetworkName = "underlay" + underlayServiceIP = "60.128.0.1" + secondaryInterfaceName = "eth1" + expectedOriginalMTU = 1200 ) var netConfig networkAttachmentConfig @@ -878,7 +868,7 @@ var _ = Describe("Multi Homing", func() { By("setting up the localnet underlay") nodes = ovsPods(cs) Expect(nodes).NotTo(BeEmpty()) - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed()) }) BeforeEach(func() { @@ -900,7 +890,7 @@ var _ = Describe("Multi Homing", func() { BeforeEach(func() { By("starting a service, connected to the underlay") - cmdWebServer = exec.Command("python3", "-m", "http.server", "--bind", underlayServiceIP, strconv.Itoa(servicePort)) + cmdWebServer = exec.Command("python3", "-m", "http.server", "--bind", underlayServiceIP, strconv.Itoa(int(servicePort))) cmdWebServer.Stderr = os.Stderr Expect(cmdWebServer.Start()).NotTo(HaveOccurred(), "failed to create web server, port might be busy") }) @@ -1264,7 +1254,7 @@ var _ = Describe("Multi Homing", func() { }) By("setting up the localnet underlay with a trunked configuration") - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed(), "configuring the OVS bridge") + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed(), "configuring the OVS bridge") By(fmt.Sprintf("creating a VLAN interface on top of the bridge connecting the cluster nodes with IP: %s", underlayIP)) cli, err := client.NewClientWithOpts(client.FromEnv) @@ -1377,7 +1367,7 @@ var _ = Describe("Multi Homing", func() { }() const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed()) } Expect(createNads(f, nadClient, extraNamespace, netConfig)).NotTo(HaveOccurred()) @@ -1803,7 +1793,7 @@ var _ = Describe("Multi Homing", func() { Expect(teardownUnderlay(nodes, secondaryBridge)).To(Succeed()) }() const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed()) Expect(createNads(f, nadClient, extraNamespace, netConfig)).NotTo(HaveOccurred()) @@ -1935,7 +1925,7 @@ var _ = Describe("Multi Homing", func() { Expect(teardownUnderlay(nodes, secondaryBridge)).To(Succeed()) }() const secondaryInterfaceName = "eth1" - Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig)).To(Succeed()) + Expect(setupUnderlay(nodes, secondaryBridge, secondaryInterfaceName, netConfig.networkName, netConfig.vlanID)).To(Succeed()) Expect(createNads(f, nadClient, extraNamespace, netConfig)).NotTo(HaveOccurred()) @@ -2238,7 +2228,7 @@ func setBridgeMappings(cs clientset.Interface, mappings ...BridgeMapping) error } for _, pods := range pods { - if err := configureBridgeMappings(pods.Name, mappings...); err != nil { + if err := configureBridgeMappings(pods.Namespace, pods.Name, mappings...); err != nil { return err } } diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index 1c0d1a7435..dea72dce28 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -226,12 +226,12 @@ func podNetworkStatus(pod *v1.Pod, predicates ...func(nadapi.NetworkStatus) bool return netStatusMeetingPredicates, nil } -func podNetworkStatusByNetConfigPredicate(netConfig networkAttachmentConfig) func(nadapi.NetworkStatus) bool { +func podNetworkStatusByNetConfigPredicate(namespace, name, role string) func(nadapi.NetworkStatus) bool { return func(networkStatus nadapi.NetworkStatus) bool { - if netConfig.role == "primary" { + if role == "primary" { return networkStatus.Default } else { - return networkStatus.Name == netConfig.namespace+"/"+netConfig.name + return networkStatus.Name == namespace+"/"+name } } } @@ -253,7 +253,7 @@ func inRange(cidr string, ip string) error { return fmt.Errorf("ip [%s] is NOT in range %s", ip, cidr) } -func connectToServer(clientPodConfig podConfiguration, serverIP string, port int) error { +func connectToServer(clientPodConfig podConfiguration, serverIP string, port uint16) error { _, err := e2ekubectl.RunKubectl( clientPodConfig.namespace, "exec", @@ -610,7 +610,11 @@ func allowedTCPPortsForPolicy(allowPorts ...int) []mnpapi.MultiNetworkPolicyPort return portAllowlist } +<<<<<<< HEAD func reachServerPodFromClient(cs clientset.Interface, serverConfig podConfiguration, clientConfig podConfiguration, serverIP string, serverPort int) error { +======= +func reachServerPodFromClient(cs clientset.Interface, serverConfig podConfiguration, clientConfig podConfiguration, serverIP string, serverPort uint16) error { +>>>>>>> downstream/release-4.20 updatedPod, err := cs.CoreV1().Pods(serverConfig.namespace).Get(context.Background(), serverConfig.name, metav1.GetOptions{}) if err != nil { return err diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 12d04604af..a3105f2ab0 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -10,6 +10,11 @@ import ( "strings" "time" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" "github.com/onsi/ginkgo/v2" @@ -50,13 +55,14 @@ var _ = Describe("Network Segmentation", func() { ) const ( - nodeHostnameKey = "kubernetes.io/hostname" - port = 9000 - defaultPort = 8080 - userDefinedNetworkIPv4Subnet = "10.128.0.0/16" - userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" - userDefinedNetworkName = "hogwarts" - nadName = "gryffindor" + port = 9000 + nodeHostnameKey = "kubernetes.io/hostname" + podClusterNetPort uint16 = 9000 + podClusterNetDefaultPort uint16 = 8080 + userDefinedNetworkIPv4Subnet = "10.128.0.0/16" + userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" + userDefinedNetworkName = "hogwarts" + nadName = "gryffindor" ) BeforeEach(func() { @@ -140,19 +146,24 @@ var _ = Describe("Network Segmentation", func() { clientPodConfig podConfiguration, serverPodConfig podConfiguration, ) { + By("ensure 2 scheduable Nodes") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) + framework.ExpectNoError(err) + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + node1Name, node2Name := nodes.Items[0].GetName(), nodes.Items[1].GetName() + By("creating the network") netConfig.namespace = f.Namespace.Name Expect(createNetworkFn(netConfig)).To(Succeed()) - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), cs, 2) - Expect(err).NotTo(HaveOccurred()) - Expect(len(nodes.Items)).To(BeNumerically(">=", 2), "must be at least 2 Nodes to schedule pods") - By("creating client/server pods") serverPodConfig.namespace = f.Namespace.Name - serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].Name} + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node1Name} clientPodConfig.namespace = f.Namespace.Name - clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].Name} + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node2Name} + runUDNPod(cs, f.Namespace.Name, serverPodConfig, nil) runUDNPod(cs, f.Namespace.Name, clientPodConfig, nil) @@ -177,7 +188,7 @@ var _ = Describe("Network Segmentation", func() { By("asserting the *client* pod can contact the server pod exposed endpoint") Eventually(func() error { - return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, podClusterNetPort) }, 2*time.Minute, 6*time.Second).Should(Succeed()) } }, @@ -195,7 +206,7 @@ var _ = Describe("Network Segmentation", func() { *podConfig( "server-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -213,7 +224,7 @@ var _ = Describe("Network Segmentation", func() { *podConfig( "server-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -232,9 +243,18 @@ var _ = Describe("Network Segmentation", func() { ) } + By("ensure enough schedable nodes exist") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), cs, 1) + Expect(err).NotTo(HaveOccurred()) + if len(nodes.Items) < 1 { + framework.Failf("expect at least one Node: %v", err) + } + nodeName := nodes.Items[0].Name + udnPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodeName} + By("Creating second namespace for default network pods") defaultNetNamespace := f.Namespace.Name + "-default" - _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: defaultNetNamespace, }, @@ -249,10 +269,6 @@ var _ = Describe("Network Segmentation", func() { netConfigParams.namespace = f.Namespace.Name Expect(createNetworkFn(netConfigParams)).To(Succeed()) - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), cs, 1) - Expect(err).NotTo(HaveOccurred()) - Expect(len(nodes.Items)).To(BeNumerically(">=", 1), "must be at least one Node to schedule pods") - nodeName := nodes.Items[0].Name udnPodConfig.namespace = f.Namespace.Name udnPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].Name} @@ -261,7 +277,7 @@ var _ = Describe("Network Segmentation", func() { ProbeHandler: v1.ProbeHandler{ HTTPGet: &v1.HTTPGetAction{ Path: "/healthz", - Port: intstr.FromInt32(port), + Port: intstr.FromInt(int(podClusterNetPort)), }, }, InitialDelaySeconds: 1, @@ -272,7 +288,7 @@ var _ = Describe("Network Segmentation", func() { ProbeHandler: v1.ProbeHandler{ HTTPGet: &v1.HTTPGetAction{ Path: "/healthz", - Port: intstr.FromInt32(port), + Port: intstr.FromInt(int(podClusterNetPort)), }, }, InitialDelaySeconds: 1, @@ -283,7 +299,7 @@ var _ = Describe("Network Segmentation", func() { ProbeHandler: v1.ProbeHandler{ HTTPGet: &v1.HTTPGetAction{ Path: "/healthz", - Port: intstr.FromInt32(port), + Port: intstr.FromInt(int(podClusterNetPort)), }, }, InitialDelaySeconds: 1, @@ -321,7 +337,7 @@ var _ = Describe("Network Segmentation", func() { // positive case for UDN pod is a successful healthcheck, checked later By("checking the default network pod can't reach UDN pod on IP " + destIP) Consistently(func() bool { - return connectToServer(podConfiguration{namespace: defaultPod.Namespace, name: defaultPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: defaultPod.Namespace, name: defaultPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) } @@ -338,11 +354,11 @@ var _ = Describe("Network Segmentation", func() { } By("checking the default network client pod can reach default pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, defaultPort) == nil + return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, podClusterNetDefaultPort) == nil }).Should(BeTrue()) By("checking the UDN pod can't reach the default network pod on IP " + destIP) Consistently(func() bool { - return connectToServer(udnPodConfig, destIP, defaultPort) != nil + return connectToServer(udnPodConfig, destIP, podClusterNetDefaultPort) != nil }, 5*time.Second).Should(BeTrue()) } @@ -358,8 +374,7 @@ var _ = Describe("Network Segmentation", func() { Expect(udnPod.Status.ContainerStatuses[0].RestartCount).To(Equal(int32(0))) By("restarting kubelet, pod should stay ready") - _, err = runCommand(containerRuntime, "exec", nodeName, - "systemctl", "restart", "kubelet") + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"systemctl", "restart", "kubelet"}) Expect(err).NotTo(HaveOccurred()) By("asserting healthcheck still works (kubelet can access the UDN pod)") @@ -384,13 +399,14 @@ var _ = Describe("Network Segmentation", func() { } By("checking the default network hostNetwork can reach default pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, defaultPort) == nil + return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, podClusterNetDefaultPort) == nil }).Should(BeTrue()) By("checking the non-kubelet host process can reach default pod on IP " + destIP) Eventually(func() bool { - _, err = runCommand(containerRuntime, "exec", nodeName, + _, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{ "curl", "--connect-timeout", "2", - net.JoinHostPort(destIP, fmt.Sprintf("%d", defaultPort))) + net.JoinHostPort(destIP, fmt.Sprintf("%d", podClusterNetDefaultPort)), + }) return err == nil }).Should(BeTrue()) } @@ -402,14 +418,15 @@ var _ = Describe("Network Segmentation", func() { By("checking the default network hostNetwork pod can't reach UDN pod on IP " + destIP) Consistently(func() bool { - return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) By("checking the non-kubelet host process can't reach UDN pod on IP " + destIP) Consistently(func() bool { - _, err = runCommand(containerRuntime, "exec", nodeName, + _, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{ "curl", "--connect-timeout", "2", - net.JoinHostPort(destIP, fmt.Sprintf("%d", port))) + net.JoinHostPort(destIP, fmt.Sprintf("%d", podClusterNetPort)), + }) return err != nil }, 5*time.Second).Should(BeTrue()) } @@ -428,6 +445,9 @@ var _ = Describe("Network Segmentation", func() { "2", "--insecure", "https://kubernetes.default/healthz") + if err != nil { + framework.Logf("connecting to kapi service failed: %v", err) + } return err == nil }, 5*time.Second).Should(BeTrue()) By("asserting UDN pod can't reach host via default network interface") @@ -481,7 +501,7 @@ var _ = Describe("Network Segmentation", func() { *podConfig( "udn-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -496,7 +516,7 @@ var _ = Describe("Network Segmentation", func() { *podConfig( "udn-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -517,11 +537,12 @@ var _ = Describe("Network Segmentation", func() { namespaceRed := f.Namespace.Name + "-" + red namespaceBlue := f.Namespace.Name + "-" + blue - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) - framework.ExpectNoError(err) - - node1Name := nodes.Items[0].Name - node2Name := nodes.Items[1].Name + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), f.ClientSet, 2) + framework.ExpectNoError(err, "two scheduable nodes are required") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + node1Name, node2Name := nodes.Items[0].GetName(), nodes.Items[1].GetName() for _, namespace := range []string{namespaceRed, namespaceBlue} { By("Creating namespace " + namespace) @@ -579,9 +600,7 @@ var _ = Describe("Network Segmentation", func() { //ensure testing accross nodes if i%2 == 0 { podConfig.nodeSelector = map[string]string{nodeHostnameKey: node1Name} - } else { - podConfig.nodeSelector = map[string]string{nodeHostnameKey: node2Name} } By("creating pod " + podConfig.name + " in " + podConfig.namespace) @@ -706,19 +725,19 @@ var _ = Describe("Network Segmentation", func() { } nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) framework.ExpectNoError(err) + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } node1Name, node2Name := nodes.Items[0].Name, nodes.Items[1].Name clientPodConfig := *podConfig( "client-pod", - withNodeSelector(map[string]string{nodeHostnameKey: node1Name}), ) serverPodConfig := *podConfig( "server-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), - withNodeSelector(map[string]string{nodeHostnameKey: node2Name}), ) - By("creating second namespace") _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -754,7 +773,9 @@ var _ = Describe("Network Segmentation", func() { By(fmt.Sprintf("creating client/server pods in namespace %s", netConfig2.namespace)) serverPodConfig.namespace = netConfig2.namespace + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node1Name} clientPodConfig.namespace = netConfig2.namespace + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node2Name} runUDNPod(cs, netConfig2.namespace, serverPodConfig, nil) runUDNPod(cs, netConfig2.namespace, clientPodConfig, nil) @@ -776,7 +797,7 @@ var _ = Describe("Network Segmentation", func() { By("asserting the *client* pod can contact the server pod exposed endpoint") Eventually(func() error { - return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, podClusterNetPort) }, 2*time.Minute, 6*time.Second).Should(Succeed()) } } @@ -1435,19 +1456,26 @@ spec: const ( externalContainerName = "ovn-k-egress-test-helper" ) - var externalIpv4, externalIpv6 string + var ( + providerCtx infraapi.Context + externalContainer infraapi.ExternalContainer + ) BeforeEach(func() { - externalIpv4, externalIpv6 = createClusterExternalContainer( - externalContainerName, - "registry.k8s.io/e2e-test-images/agnhost:2.45", - runExternalContainerCmd(), - httpServerContainerCmd(port), - ) - - DeferCleanup(func() { - deleteClusterExternalContainer(externalContainerName) - }) + providerCtx = infraprovider.Get().NewTestContext() + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "provider primary network must be available") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainerSpec := infraapi.ExternalContainer{ + Name: externalContainerName, + Image: images.AgnHost(), + Network: providerPrimaryNetwork, + Args: httpServerContainerCmd(uint16(externalContainerPort)), + ExtPort: externalContainerPort, + } + externalContainer, err = providerCtx.CreateExternalContainer(externalContainerSpec) + framework.ExpectNoError(err, "external container must succeed") }) + DescribeTableSubtree("created using", func(createNetworkFn func(c *networkAttachmentConfigParams) error) { @@ -1493,7 +1521,7 @@ spec: Expect(podAnno.Routes).To(HaveLen(expectedNumberOfRoutes(*netConfigParams))) - assertClientExternalConnectivity(clientPodConfig, externalIpv4, externalIpv6, port) + assertClientExternalConnectivity(clientPodConfig, externalContainer.GetIPv4(), externalContainer.GetIPv6(), externalContainer.GetPort()) }, Entry("by one pod over a layer2 network", &networkAttachmentConfigParams{ @@ -1561,16 +1589,20 @@ spec: Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, f.Namespace.Name, testUdnName), 5*time.Second, time.Second).Should(Succeed()) By("create UDN pod") cfg := podConfig(testPodName, withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) })) cfg.namespace = f.Namespace.Name udnPod = runUDNPod(cs, f.Namespace.Name, *cfg, nil) }) It("should react to k8s.ovn.org/open-default-ports annotations changes", func() { - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 1) - framework.ExpectNoError(err) - node1Name := nodes.Items[0].Name + By("ensure enough Nodes are available for scheduling") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), f.ClientSet, 2) + framework.ExpectNoError(err, "two scheduleable Nodes must be available") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + node1Name, node2Name := nodes.Items[0].GetName(), nodes.Items[1].GetName() By("Creating second namespace for default network pod") defaultNetNamespace := f.Namespace.Name + "-default" _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ @@ -1589,7 +1621,7 @@ spec: Expect(err).NotTo(HaveOccurred()) By("creating default network hostNetwork client pod") - hostNetPod, err := createPod(f, "host-net-client-pod", node1Name, + hostNetPod, err := createPod(f, "host-net-client-pod", node2Name, defaultNetNamespace, []string{}, nil, func(pod *v1.Pod) { pod.Spec.HostNetwork = true }) @@ -1602,20 +1634,20 @@ spec: ) Expect(err).NotTo(HaveOccurred()) - By(fmt.Sprintf("verify default network client pod can't access UDN pod on port %d", port)) + By(fmt.Sprintf("verify default network client pod can't access UDN pod on port %d", podClusterNetPort)) for _, destIP := range []string{udnIPv4, udnIPv6} { if destIP == "" { continue } By("checking the default network pod can't reach UDN pod on IP " + destIP) Consistently(func() bool { - return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) if !isUDNHostIsolationDisabled() { By("checking the default hostNetwork pod can't reach UDN pod on IP " + destIP) Consistently(func() bool { - return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) } } @@ -1624,23 +1656,23 @@ spec: udnPod.Annotations[openDefaultPortsAnnotation] = fmt.Sprintf( `- protocol: tcp - port: %d`, port) + port: %d`, podClusterNetPort) udnPod, err = cs.CoreV1().Pods(udnPod.Namespace).Update(context.Background(), udnPod, metav1.UpdateOptions{}) Expect(err).NotTo(HaveOccurred()) - By(fmt.Sprintf("verify default network client pod can access UDN pod on open port %d", port)) + By(fmt.Sprintf("verify default network client pod can access UDN pod on open port %d", podClusterNetPort)) for _, destIP := range []string{udnIPv4, udnIPv6} { if destIP == "" { continue } By("checking the default network pod can reach UDN pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) == nil + return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, podClusterNetPort) == nil }, 5*time.Second).Should(BeTrue()) By("checking the default hostNetwork pod can reach UDN pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, port) == nil + return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, podClusterNetPort) == nil }, 5*time.Second).Should(BeTrue()) } @@ -1648,24 +1680,24 @@ spec: // this should clean up open ports and throw an event udnPod.Annotations[openDefaultPortsAnnotation] = fmt.Sprintf( `- protocol: ppp - port: %d`, port) + port: %d`, podClusterNetPort) udnPod, err = cs.CoreV1().Pods(udnPod.Namespace).Update(context.Background(), udnPod, metav1.UpdateOptions{}) Expect(err).NotTo(HaveOccurred()) - By(fmt.Sprintf("verify default network client pod can't access UDN pod on port %d", port)) + By(fmt.Sprintf("verify default network client pod can't access UDN pod on port %d", podClusterNetPort)) for _, destIP := range []string{udnIPv4, udnIPv6} { if destIP == "" { continue } By("checking the default network pod can't reach UDN pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) if !isUDNHostIsolationDisabled() { By("checking the default hostNetwork pod can't reach UDN pod on IP " + destIP) Eventually(func() bool { - return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, port) != nil + return connectToServer(podConfiguration{namespace: hostNetPod.Namespace, name: hostNetPod.Name}, destIP, podClusterNetPort) != nil }, 5*time.Second).Should(BeTrue()) } } @@ -1703,13 +1735,18 @@ spec: Expect(err).ShouldNot(HaveOccurred(), "creating manifest must succeed") DeferCleanup(cleanup) Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, netConfig.namespace, netConfig.name), 5*time.Second, time.Second).Should(Succeed()) + By("ensure two Nodes are available for scheduling") nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), f.ClientSet, 2) Expect(err).ShouldNot(HaveOccurred(), "test requires at least two schedulable nodes") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + node1Name, node2Name := nodes.Items[0].GetName(), nodes.Items[1].GetName() Expect(len(nodes.Items)).Should(BeNumerically(">=", 2), "test requires >= 2 Ready nodes") serverPodConfig.namespace = f.Namespace.Name - serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].Name} + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node1Name} clientPodConfig.namespace = f.Namespace.Name - clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].Name} + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: node2Name} runUDNPod(cs, f.Namespace.Name, serverPodConfig, nil) runUDNPod(cs, f.Namespace.Name, clientPodConfig, nil) serverIP, err := podIPsForUserDefinedPrimaryNetwork(cs, f.Namespace.Name, serverPodConfig.name, namespacedName(f.Namespace.Name, netConfig.name), 0) @@ -1719,11 +1756,11 @@ spec: clientPod := getPod(f, clientPodConfig.name) for _, testPod := range []*v1.Pod{clientPod, serverPod} { By(fmt.Sprintf("asserting the server pod IP %v is reachable from client before restart of OVNKube node pod on Node %s", serverIP, testPod.Spec.Hostname)) - Expect(reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port)).ShouldNot(HaveOccurred(), "must have connectivity to server pre OVN Kube node Pod restart") + Expect(reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, podClusterNetPort)).ShouldNot(HaveOccurred(), "must have connectivity to server pre OVN Kube node Pod restart") By(fmt.Sprintf("restarting OVNKube node Pod located on Node %s which hosts test Pod %s/%s", testPod.Spec.NodeName, testPod.Namespace, testPod.Name)) - Expect(restartOVNKubeNodePod(cs, ovnNamespace, testPod.Spec.NodeName)).ShouldNot(HaveOccurred(), "restart of OVNKube node pod must succeed") + Expect(restartOVNKubeNodePod(cs, deploymentconfig.Get().OVNKubernetesNamespace(), testPod.Spec.NodeName)).ShouldNot(HaveOccurred(), "restart of OVNKube node pod must succeed") By(fmt.Sprintf("asserting the server pod IP %v is reachable from client post restart", serverIP)) - Expect(reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port)).ShouldNot(HaveOccurred(), "must have connectivity to server post restart") + Expect(reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, podClusterNetPort)).ShouldNot(HaveOccurred(), "must have connectivity to server post restart") } }, Entry( @@ -1740,7 +1777,7 @@ spec: *podConfig( "server-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -1758,7 +1795,7 @@ spec: *podConfig( "server-pod", withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(podClusterNetPort) }), ), ), @@ -2305,7 +2342,7 @@ func connectToServerViaDefaultNetwork(clientPodConfig podConfiguration, serverIP } // assertClientExternalConnectivity checks if the client can connect to an externally created IP outside the cluster -func assertClientExternalConnectivity(clientPodConfig podConfiguration, externalIpv4 string, externalIpv6 string, port int) { +func assertClientExternalConnectivity(clientPodConfig podConfiguration, externalIpv4 string, externalIpv6 string, port uint16) { if isIPv4Supported() { By("asserting the *client* pod can contact the server's v4 IP located outside the cluster") Eventually(func() error { @@ -2321,10 +2358,6 @@ func assertClientExternalConnectivity(clientPodConfig podConfiguration, external } } -func runExternalContainerCmd() []string { - return []string{"--network", "kind"} -} - func expectedNumberOfRoutes(netConfig networkAttachmentConfigParams) int { if netConfig.topology == "layer2" { if isIPv6Supported() && isIPv4Supported() { diff --git a/test/e2e/network_segmentation_endpointslices_mirror.go b/test/e2e/network_segmentation_endpointslices_mirror.go index ae00d5fdec..171073bdae 100644 --- a/test/e2e/network_segmentation_endpointslices_mirror.go +++ b/test/e2e/network_segmentation_endpointslices_mirror.go @@ -9,6 +9,8 @@ import ( . "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" appsv1 "k8s.io/api/apps/v1" @@ -18,7 +20,6 @@ import ( clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment" - e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eservice "k8s.io/kubernetes/test/e2e/framework/service" ) @@ -63,10 +64,14 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { replicas := int32(3) By("creating the deployment") - deployment := e2edeployment.NewDeployment("test-deployment", replicas, map[string]string{"app": "test"}, "agnhost", agnhostImage, appsv1.RollingUpdateDeploymentStrategyType) + var port uint16 = 80 + if isHostNetwork { + port = infraprovider.Get().GetK8HostPort() + } + deployment := e2edeployment.NewDeployment("test-deployment", replicas, map[string]string{"app": "test"}, "agnhost", images.AgnHost(), appsv1.RollingUpdateDeploymentStrategyType) deployment.Namespace = f.Namespace.Name deployment.Spec.Template.Spec.HostNetwork = isHostNetwork - deployment.Spec.Template.Spec.Containers[0].Command = e2epod.GenerateScriptCmd("/agnhost netexec --http-port 80") + deployment.Spec.Template.Spec.Containers[0].Command = getAgnHostHTTPPortBindFullCMD(port) _, err := cs.AppsV1().Deployments(f.Namespace.Name).Create(context.Background(), deployment, metav1.CreateOptions{}) framework.ExpectNoError(err, "Failed creating the deployment %v", err) @@ -194,9 +199,9 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { replicas := int32(3) By("creating the deployment") - deployment := e2edeployment.NewDeployment("test-deployment", replicas, map[string]string{"app": "test"}, "agnhost", agnhostImage, appsv1.RollingUpdateDeploymentStrategyType) + deployment := e2edeployment.NewDeployment("test-deployment", replicas, map[string]string{"app": "test"}, "agnhost", images.AgnHost(), appsv1.RollingUpdateDeploymentStrategyType) deployment.Namespace = defaultNetNamespace.Name - deployment.Spec.Template.Spec.Containers[0].Command = e2epod.GenerateScriptCmd("/agnhost netexec --http-port 80") + deployment.Spec.Template.Spec.Containers[0].Command = getAgnHostHTTPPortBindFullCMD(80) _, err = cs.AppsV1().Deployments(defaultNetNamespace.Name).Create(context.Background(), deployment, metav1.CreateOptions{}) framework.ExpectNoError(err, "Failed creating the deployment %v", err) diff --git a/test/e2e/network_segmentation_localnet.go b/test/e2e/network_segmentation_localnet.go index ce67c08b26..a6b68db97c 100644 --- a/test/e2e/network_segmentation_localnet.go +++ b/test/e2e/network_segmentation_localnet.go @@ -49,7 +49,7 @@ var _ = Describe("Network Segmentation: Localnet", func() { Expect(teardownUnderlay(ovsPods, ovsBrName)).To(Succeed()) }) c := networkAttachmentConfig{networkAttachmentConfigParams: networkAttachmentConfigParams{networkName: physicalNetworkName, vlanID: vlan}} - Expect(setupUnderlay(ovsPods, ovsBrName, secondaryIfaceName, c)).To(Succeed()) + Expect(setupUnderlay(ovsPods, ovsBrName, secondaryIfaceName, c.networkName, c.vlanID)).To(Succeed()) By("create test namespaces") _, err := f.ClientSet.CoreV1().Namespaces().Create(context.Background(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsRed}}, metav1.CreateOptions{}) diff --git a/test/e2e/network_segmentation_services.go b/test/e2e/network_segmentation_services.go index f0469f0f0e..d580bc190f 100644 --- a/test/e2e/network_segmentation_services.go +++ b/test/e2e/network_segmentation_services.go @@ -11,6 +11,9 @@ import ( nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" kapi "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" @@ -39,7 +42,7 @@ var _ = Describe("Network Segmentation: services", func() { serviceTargetPort = 80 userDefinedNetworkIPv4Subnet = "10.128.0.0/16" userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" - clientContainer = "frr" + clientContainerName = "frr" ) var ( @@ -180,10 +183,11 @@ ips=$(ip -o addr show dev $iface| grep global |awk '{print $4}' | cut -d/ -f1 | checkConnectionToNodePort(f, udnClientPod2, udnService, &nodes.Items[2], "other node", udnServerPod.Name) By("Connect to the UDN service from the UDN client external container") - checkConnectionToLoadBalancersFromExternalContainer(f, clientContainer, udnService, udnServerPod.Name) - checkConnectionToNodePortFromExternalContainer(f, clientContainer, udnService, &nodes.Items[0], "server node", udnServerPod.Name) - checkConnectionToNodePortFromExternalContainer(f, clientContainer, udnService, &nodes.Items[1], "other node", udnServerPod.Name) - checkConnectionToNodePortFromExternalContainer(f, clientContainer, udnService, &nodes.Items[2], "other node", udnServerPod.Name) + externalContainer := infraapi.ExternalContainer{Name: "frr"} + checkConnectionToLoadBalancersFromExternalContainer(f, externalContainer, udnService, udnServerPod.Name) + checkConnectionToNodePortFromExternalContainer(externalContainer, udnService, &nodes.Items[0], "server node", udnServerPod.Name) + checkConnectionToNodePortFromExternalContainer(externalContainer, udnService, &nodes.Items[1], "other node", udnServerPod.Name) + checkConnectionToNodePortFromExternalContainer(externalContainer, udnService, &nodes.Items[2], "other node", udnServerPod.Name) // Default network -> UDN // Check that it cannot connect @@ -253,7 +257,7 @@ ips=$(ip -o addr show dev $iface| grep global |awk '{print $4}' | cut -d/ -f1 | // in OVNK in CLBO state https://issues.redhat.com/browse/OCPBUGS-41499 if netConfigParams.topology == "layer3" { // no need to run it for layer 2 as well By("Restart ovnkube-node on one node and verify that the new ovnkube-node pod goes to the running state") - err = restartOVNKubeNodePod(cs, ovnNamespace, clientNode) + err = restartOVNKubeNodePod(cs, deploymentconfig.Get().OVNKubernetesNamespace(), clientNode) Expect(err).NotTo(HaveOccurred()) } }, @@ -468,7 +472,7 @@ func checkConnectionOrNoConnectionToLoadBalancers(f *framework.Framework, client } } -func checkConnectionToNodePortFromExternalContainer(f *framework.Framework, containerName string, service *v1.Service, node *v1.Node, nodeRoleMsg, expectedOutput string) { +func checkConnectionToNodePortFromExternalContainer(externalContainer infraapi.ExternalContainer, service *v1.Service, node *v1.Node, nodeRoleMsg, expectedOutput string) { GinkgoHelper() var err error nodePort := service.Spec.Ports[0].NodePort @@ -477,11 +481,12 @@ func checkConnectionToNodePortFromExternalContainer(f *framework.Framework, cont for nodeIP := range nodeIPs { msg := fmt.Sprintf("Client at external container %s should connect to NodePort service %s/%s on %s:%d (node %s, %s)", - containerName, service.Namespace, service.Name, nodeIP, nodePort, node.Name, nodeRoleMsg) + externalContainer.GetName(), service.Namespace, service.Name, nodeIP, nodePort, node.Name, nodeRoleMsg) By(msg) - cmd := []string{containerRuntime, "exec", containerName, "/bin/bash", "-c", fmt.Sprintf("echo hostname | nc -u -w 1 %s %d", nodeIP, nodePort)} Eventually(func() (string, error) { - return runCommand(cmd...) + return infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{ + "/bin/bash", "-c", fmt.Sprintf("echo hostname | nc -u -w 1 %s %d", nodeIP, nodePort), + }) }). WithTimeout(5*time.Second). WithPolling(200*time.Millisecond). @@ -489,17 +494,18 @@ func checkConnectionToNodePortFromExternalContainer(f *framework.Framework, cont } } -func checkConnectionToLoadBalancersFromExternalContainer(f *framework.Framework, containerName string, service *v1.Service, expectedOutput string) { +func checkConnectionToLoadBalancersFromExternalContainer(f *framework.Framework, externalContainer infraapi.ExternalContainer, service *v1.Service, expectedOutput string) { GinkgoHelper() port := service.Spec.Ports[0].Port for _, lbIngress := range filterLoadBalancerIngressByIPFamily(f, service) { msg := fmt.Sprintf("Client at external container %s should reach service %s/%s on LoadBalancer IP %s port %d", - containerName, service.Namespace, service.Name, lbIngress.IP, port) + externalContainer.GetName(), service.Namespace, service.Name, lbIngress.IP, port) By(msg) - cmd := []string{containerRuntime, "exec", containerName, "/bin/bash", "-c", fmt.Sprintf("echo hostname | nc -u -w 1 %s %d", lbIngress.IP, port)} Eventually(func() (string, error) { - return runCommand(cmd...) + return infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{ + "/bin/bash", "-c", fmt.Sprintf("echo hostname | nc -u -w 1 %s %d", lbIngress.IP, port), + }) }). // It takes some time for the container to receive the dynamic routing WithTimeout(20*time.Second). diff --git a/test/e2e/networkqos.go b/test/e2e/networkqos.go new file mode 100644 index 0000000000..5dd65229c6 --- /dev/null +++ b/test/e2e/networkqos.go @@ -0,0 +1,769 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "net" + "os" + "strconv" + "strings" + "time" + + "golang.org/x/sync/errgroup" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = ginkgo.Describe("e2e NetworkQoS validation", func() { + const ( + podImage = "ghcr.io/nicolaka/netshoot:v0.13" + networkQoSYaml = "networkqos.yaml" + nqosSpecName = "nqos-test-spec" + srcPodName = "src-nqos-pod" + tcpdumpIPv4 = "(ip and (ip[1] & 0xfc) >> 2 == %d)" + tcpdumpIPv6 = "(ip6 and (ip6[0:2] & 0xfc0) >> 6 == %d)" + dstPod1Name = "nqos-dst-pod1" + dstPod2Name = "nqos-dst-pod2" + dstPod3Name = "nqos-dst-pod3" + dstPod4Name = "nqos-dst-pod4" + + bandwidthFluctuation = 1.5 + ) + + var ( + skipIpv4 bool + skipIpv6 bool + dstPodNamespace string + dstNode string + dstPod1IPv4 string + dstPod1IPv6 string + dstPod2IPv4 string + dstPod2IPv6 string + dstPod3IPv4 string + dstPod3IPv6 string + dstPod4IPv4 string + dstPod4IPv6 string + nodeIPv4Range string + nodeIPv6Range string + ) + + f := wrappedTestFramework("networkqos") + + waitForNetworkQoSApplied := func(namespace string) { + gomega.Eventually(func() bool { + output, err := e2ekubectl.RunKubectl(namespace, "get", "networkqos", nqosSpecName) + if err != nil { + framework.Failf("could not get the networkqos default in namespace: %s", namespace) + } + return strings.Contains(output, "NetworkQoS Destinations applied") + }, 10*time.Second).Should(gomega.BeTrue(), fmt.Sprintf("expected networkqos in namespace %s to be successfully applied", namespace)) + } + + ginkgo.BeforeEach(func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err) + if len(nodes.Items) < 2 { + framework.Failf("Test requires >= 2 Ready nodes, but there are only %v nodes", len(nodes.Items)) + } + nodeAddresses := map[string]string{} + err = json.Unmarshal([]byte(nodes.Items[0].Annotations["k8s.ovn.org/node-primary-ifaddr"]), &nodeAddresses) + framework.ExpectNoError(err) + if nodeIP, ok := nodeAddresses["ipv4"]; ok { + _, ipnet, _ := net.ParseCIDR(nodeIP) + nodeIPv4Range = ipnet.String() + skipIpv4 = false + } else { + ginkgo.By("Node IPv4 address not found: Will be skipping IPv4 checks in the Networking QoS test") + nodeIPv4Range = "0.0.0.0/0" + skipIpv4 = true + } + if nodeIP, ok := nodeAddresses["ipv6"]; ok { + _, ipnet, _ := net.ParseCIDR(nodeIP) + nodeIPv6Range = ipnet.String() + skipIpv6 = false + } else { + ginkgo.By("Node IPv6 address not found: Will be skipping IPv6 checks in the Networking QoS test") + nodeIPv6Range = "::/0" + skipIpv6 = true + } + if skipIpv4 && skipIpv6 { + framework.Fail("Neither IPv4 nor IPv6 is configured on the node") + } + dstPodNamespace = f.Namespace.Name + "-dest" + // set up dest namespace + dstNs := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: dstPodNamespace, + Labels: map[string]string{ + "app": "nqos-test", + }, + }, + } + _, err = f.ClientSet.CoreV1().Namespaces().Create(context.Background(), dstNs, metav1.CreateOptions{}) + framework.ExpectNoError(err, "Error creating Namespace %v: %v", dstPodNamespace, err) + + _, err = createPod(f, srcPodName, nodes.Items[0].Name, f.Namespace.Name, []string{"bash", "-c", "sleep infinity"}, map[string]string{"component": "nqos-test-src"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + dstNode = nodes.Items[1].Name + }) + + ginkgo.DescribeTable("Should have correct DSCP value for overlay traffic when NetworkQoS is applied", + func(skipThisTableEntry *bool, tcpDumpTpl string, dst1IP, dst2IP, dst3IP, dst4IP *string) { + if *skipThisTableEntry { + return + } + dscpValue := 50 + // dest pod without protocol and port + dstPod1, err := createPod(f, dstPod1Name, dstNode, dstPodNamespace, []string{"bash", "-c", "sleep infinity"}, map[string]string{"component": "nqos-test-dst"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "tcpdump") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod1IPv4, dstPod1IPv6 = getPodAddresses(dstPod1) + + // dest pod covered by tcp without port rule + dstPod2, err := createPod(f, dstPod2Name, dstNode, dstPodNamespace, []string{"bash", "-c", "nc -l -p 9090; sleep infinity"}, map[string]string{"component": "nqos-test-tcp"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "nc") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod2IPv4, dstPod2IPv6 = getPodAddresses(dstPod2) + + // dest pod covered by tcp with port rule + dstPod3, err := createPod(f, dstPod3Name, dstNode, dstPodNamespace, []string{"bash", "-c", "python3 -m http.server 80; sleep infinity"}, map[string]string{"component": "nqos-test-web"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "python3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod3IPv4, dstPod3IPv6 = getPodAddresses(dstPod3) + + // dest pod not covered by networkqos + dstPod4, err := createPod(f, dstPod4Name, dstNode, dstPodNamespace, []string{"bash", "-c", "sleep infinity"}, nil, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "tcpdump") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod4IPv4, dstPod4IPv6 = getPodAddresses(dstPod4) + + // no dscp (dscp == 0) should be deteced before networkqos is applied + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod1Name, *dst1IP, tcpDumpTpl, 0) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod2Name, *dst2IP, tcpDumpTpl, 9090, 0) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod3Name, *dst3IP, tcpDumpTpl, 80, 0) + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod4Name, *dst4IP, tcpDumpTpl, 0) + + // apply networkqos spec + networkQoSSpec := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1alpha1 +kind: NetworkQoS +metadata: + namespace: %s + name: %s +spec: + podSelector: + matchLabels: + component: nqos-test-src + priority: 50 + egress: + - dscp: %d + classifier: + to: + - podSelector: + matchLabels: + component: nqos-test-dst + namespaceSelector: + matchLabels: + app: nqos-test + - dscp: %d + classifier: + ports: + - protocol: TCP + to: + - podSelector: + matchLabels: + component: nqos-test-tcp + namespaceSelector: + matchLabels: + app: nqos-test + - dscp: %d + classifier: + ports: + - protocol: TCP + port: 80 + to: + - podSelector: + matchLabels: + component: nqos-test-web + namespaceSelector: + matchLabels: + app: nqos-test +`, f.Namespace.Name, nqosSpecName, dscpValue, dscpValue+1, dscpValue+2) + if err := os.WriteFile(networkQoSYaml, []byte(networkQoSSpec), 0644); err != nil { + framework.Failf("Unable to write CRD to disk: %v", err) + } + defer func() { + if err := os.Remove(networkQoSYaml); err != nil { + framework.Logf("Unable to remove the CRD file from disk: %v", err) + } + }() + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", networkQoSYaml) + framework.Logf("NetworkQoS applied") + waitForNetworkQoSApplied(f.Namespace.Name) + // verify dscp + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod1Name, *dst1IP, tcpDumpTpl, dscpValue) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod2Name, *dst2IP, tcpDumpTpl, 9090, dscpValue+1) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod3Name, *dst3IP, tcpDumpTpl, 80, dscpValue+2) + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod4Name, *dst4IP, tcpDumpTpl, 0) + }, + ginkgo.Entry("ipv4", &skipIpv4, tcpdumpIPv4, &dstPod1IPv4, &dstPod2IPv4, &dstPod3IPv4, &dstPod4IPv4), + ginkgo.Entry("ipv6", &skipIpv6, tcpdumpIPv6, &dstPod1IPv6, &dstPod2IPv6, &dstPod3IPv6, &dstPod4IPv6), + ) + + ginkgo.DescribeTable("Should have correct DSCP value for host network traffic when NetworkQoS is applied", + func(skipThisTableEntry *bool, tcpDumpTpl string, dst1IP, dst2IP, dst3IP, dst4IP *string) { + if *skipThisTableEntry { + return + } + dscpValue := 32 + // dest pod to test traffic without protocol and port + dstPod1, err := createPod(f, dstPod1Name, dstNode, dstPodNamespace, []string{"bash", "-c", "sleep infinity"}, nil, func(p *corev1.Pod) { + p.Spec.HostNetwork = true + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "tcpdump") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod1IPv4, dstPod1IPv6 = getPodAddresses(dstPod1) + + // dest pod to test traffic with tcp protocol but no port + dstPod2, err := createPod(f, dstPod2Name, dstNode, dstPodNamespace, []string{"bash", "-c", "nc -l -p 9090; sleep infinity"}, nil, func(p *corev1.Pod) { + p.Spec.HostNetwork = true + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "nc") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod2IPv4, dstPod2IPv6 = getPodAddresses(dstPod2) + + // dest pod to test traffic with tcp protocol and port + dstPod3, err := createPod(f, dstPod3Name, dstNode, dstPodNamespace, []string{"bash", "-c", "python3 -m http.server 80; sleep infinity"}, nil, func(p *corev1.Pod) { + p.Spec.HostNetwork = true + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "python3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod3IPv4, dstPod3IPv6 = getPodAddresses(dstPod3) + + // dest pod not covered by networkqos + dstPod4, err := createPod(f, dstPod4Name, dstNode, dstPodNamespace, []string{"bash", "-c", "sleep infinity"}, nil, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "tcpdump") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod4IPv4, dstPod4IPv6 = getPodAddresses(dstPod4) + + // no dscp (dscp == 0) should be deteced before networkqos is applied + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod1Name, *dst1IP, tcpDumpTpl, 0) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod2Name, *dst2IP, tcpDumpTpl, 9090, 0) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod3Name, *dst3IP, tcpDumpTpl, 80, 0) + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod4Name, *dst4IP, tcpDumpTpl, 0) + + // apply networkqos spec + networkQoSSpec := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1alpha1 +kind: NetworkQoS +metadata: + namespace: %s + name: %s +spec: + podSelector: + matchLabels: + component: nqos-test-src + priority: 51 + egress: + - dscp: %d + classifier: + to: + - ipBlock: + cidr: %s + - ipBlock: + cidr: %s + - dscp: %d + classifier: + ports: + - protocol: TCP + to: + - ipBlock: + cidr: %s + - ipBlock: + cidr: %s + - dscp: %d + classifier: + ports: + - protocol: TCP + port: 80 + to: + - ipBlock: + cidr: %s + - ipBlock: + cidr: %s +`, f.Namespace.Name, nqosSpecName, dscpValue, nodeIPv4Range, nodeIPv6Range, dscpValue+1, nodeIPv4Range, nodeIPv6Range, dscpValue+2, nodeIPv4Range, nodeIPv6Range) + if err := os.WriteFile(networkQoSYaml, []byte(networkQoSSpec), 0644); err != nil { + framework.Failf("Unable to write CRD to disk: %v", err) + } + defer func() { + if err := os.Remove(networkQoSYaml); err != nil { + framework.Logf("Unable to remove the CRD file from disk: %v", err) + } + }() + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", networkQoSYaml) + framework.Logf("NetworkQoS applied") + waitForNetworkQoSApplied(f.Namespace.Name) + // verify dscp + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod1Name, *dst1IP, tcpDumpTpl, dscpValue) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod2Name, *dst2IP, tcpDumpTpl, 9090, dscpValue+1) + netcatExpectDscp(f, srcPodName, dstPodNamespace, dstPod3Name, *dst3IP, tcpDumpTpl, 80, dscpValue+2) + pingExpectDscp(f, srcPodName, dstPodNamespace, dstPod4Name, *dst4IP, tcpDumpTpl, 0) + }, + ginkgo.Entry("ipv4", &skipIpv4, tcpdumpIPv4, &dstPod1IPv4, &dstPod2IPv4, &dstPod3IPv4, &dstPod4IPv4), + ginkgo.Entry("ipv6", &skipIpv6, tcpdumpIPv6, &dstPod1IPv6, &dstPod2IPv6, &dstPod3IPv6, &dstPod4IPv6), + ) + + ginkgo.DescribeTable("Limits egress traffic to all target pods below the specified rate in NetworkQoS spec", + func(skipThisTableEntry *bool, dst1IP, dst2IP *string) { + if *skipThisTableEntry { + return + } + rate := 10000 + // dest pod 1 for test without protocol & port + dstPod1, err := createPod(f, dstPod1Name, dstNode, dstPodNamespace, []string{"bash", "-c", "iperf3 -s"}, map[string]string{"component": "nqos-test-dst"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "iperf3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod1IPv4, dstPod1IPv6 = getPodAddresses(dstPod1) + // dest pod 2 for test without protocol & port + dstPod2, err := createPod(f, dstPod2Name, dstNode, dstPodNamespace, []string{"bash", "-c", "iperf3 -s"}, map[string]string{"component": "nqos-test-dst"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "iperf3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod2IPv4, dstPod2IPv6 = getPodAddresses(dstPod2) + + bps := twoStreamIperf3Tests(f, srcPodName, *dst1IP, *dst2IP, 5201) + gomega.Expect(bps/1000 > float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + + // apply networkqos spec + networkQoSSpec := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1alpha1 +kind: NetworkQoS +metadata: + namespace: %s + name: %s +spec: + podSelector: + matchLabels: + component: nqos-test-src + priority: 52 + egress: + - dscp: 1 + bandwidth: + rate: %d + classifier: + to: + - podSelector: + matchLabels: + component: nqos-test-dst + namespaceSelector: + matchLabels: + app: nqos-test +`, f.Namespace.Name, nqosSpecName, rate) + if err := os.WriteFile(networkQoSYaml, []byte(networkQoSSpec), 0644); err != nil { + framework.Failf("Unable to write CRD to disk: %v", err) + } + defer func() { + if err := os.Remove(networkQoSYaml); err != nil { + framework.Logf("Unable to remove the CRD file from disk: %v", err) + } + }() + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", networkQoSYaml) + framework.Logf("NetworkQoS applied") + waitForNetworkQoSApplied(f.Namespace.Name) + bps = twoStreamIperf3Tests(f, srcPodName, *dst1IP, *dst2IP, 5201) + gomega.Expect(bps/1000 <= float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + }, + ginkgo.Entry("ipv4", &skipIpv4, &dstPod1IPv4, &dstPod2IPv4), + ginkgo.Entry("ipv6", &skipIpv6, &dstPod1IPv6, &dstPod2IPv6), + ) + + ginkgo.DescribeTable("Limits egress traffic targeting an individual pod by protocol through a NetworkQoS spec", + func(skipThisTableEntry *bool, dst1IP *string) { + if *skipThisTableEntry { + return + } + rate := 5000 + // dest pod for test with protocol + dstPod1, err := createPod(f, dstPod1Name, dstNode, dstPodNamespace, []string{"bash", "-c", "iperf3 -s"}, map[string]string{"component": "nqos-test-tcp"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "iperf3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod1IPv4, dstPod1IPv6 = getPodAddresses(dstPod1) + bps := iperf3Test(f, srcPodName, *dst1IP, 5201) + gomega.Expect(bps/1000 > float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + // apply networkqos spec + networkQoSSpec := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1alpha1 +kind: NetworkQoS +metadata: + namespace: %s + name: %s +spec: + podSelector: + matchLabels: + component: nqos-test-src + priority: 53 + egress: + - dscp: 2 + bandwidth: + rate: %d + classifier: + ports: + - protocol: TCP + to: + - podSelector: + matchLabels: + component: nqos-test-tcp + namespaceSelector: + matchLabels: + app: nqos-test +`, f.Namespace.Name, nqosSpecName, rate) + if err := os.WriteFile(networkQoSYaml, []byte(networkQoSSpec), 0644); err != nil { + framework.Failf("Unable to write CRD to disk: %v", err) + } + defer func() { + if err := os.Remove(networkQoSYaml); err != nil { + framework.Logf("Unable to remove the CRD file from disk: %v", err) + } + }() + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", networkQoSYaml) + framework.Logf("NetworkQoS applied") + waitForNetworkQoSApplied(f.Namespace.Name) + bps = iperf3Test(f, srcPodName, *dst1IP, 5201) + gomega.Expect(bps/1000 <= float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + }, + ginkgo.Entry("ipv4", &skipIpv4, &dstPod1IPv4), + ginkgo.Entry("ipv6", &skipIpv6, &dstPod1IPv6), + ) + + ginkgo.DescribeTable("Limits egress traffic targeting a pod by protocol and port through a NetworkQoS spec", + func(skipThisTableEntry *bool, dst1IP *string) { + if *skipThisTableEntry { + return + } + rate := 5000 + // dest pod for test with protocol and port + dstPod1, err := createPod(f, dstPod1Name, dstNode, dstPodNamespace, []string{"bash", "-c", "iperf3 -s -p 80"}, map[string]string{"component": "nqos-test-proto-and-port"}, func(p *corev1.Pod) { + p.Spec.Containers[0].Image = podImage + }) + framework.ExpectNoError(err) + gomega.Eventually(func() error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", dstPod1Name, "--", "which", "iperf3") + return err + + }, 60*time.Second, 1*time.Second).ShouldNot(gomega.HaveOccurred()) + dstPod1IPv4, dstPod1IPv6 = getPodAddresses(dstPod1) + bps := iperf3Test(f, srcPodName, *dst1IP, 80) + gomega.Expect(bps/1000 > float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + // apply networkqos spec + networkQoSSpec := fmt.Sprintf(` +apiVersion: k8s.ovn.org/v1alpha1 +kind: NetworkQoS +metadata: + namespace: %s + name: %s +spec: + podSelector: + matchLabels: + component: nqos-test-src + priority: 54 + egress: + - dscp: 3 + bandwidth: + rate: %d + classifier: + ports: + - protocol: TCP + port: 80 + to: + - podSelector: + matchLabels: + component: nqos-test-proto-and-port + namespaceSelector: + matchLabels: + app: nqos-test +`, f.Namespace.Name, nqosSpecName, rate) + if err := os.WriteFile(networkQoSYaml, []byte(networkQoSSpec), 0644); err != nil { + framework.Failf("Unable to write CRD to disk: %v", err) + } + defer func() { + if err := os.Remove(networkQoSYaml); err != nil { + framework.Logf("Unable to remove the CRD file from disk: %v", err) + } + }() + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "create", "-f", networkQoSYaml) + framework.Logf("NetworkQoS applied") + waitForNetworkQoSApplied(f.Namespace.Name) + bps = iperf3Test(f, srcPodName, *dst1IP, 80) + gomega.Expect(bps/1000 <= float64(rate)*bandwidthFluctuation).To(gomega.BeTrue()) + }, + ginkgo.Entry("ipv4", &skipIpv4, &dstPod1IPv4), + ginkgo.Entry("ipv6", &skipIpv6, &dstPod1IPv6), + ) + + ginkgo.AfterEach(func() { + err := f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), dstPodNamespace, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "Error deleting Namespace %v: %v", dstPodNamespace, err) + }) +}) + +func pingExpectDscp(f *framework.Framework, srcPod, dstPodNamespace, dstPod, dstPodIP, tcpDumpTpl string, dscp int) { + tcpDumpSync := errgroup.Group{} + pingSync := errgroup.Group{} + + checkDSCPOnPod := func(pod string, dscp int) error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", pod, "--", "timeout", "10", + "tcpdump", "-i", "any", "-c", "1", "-v", fmt.Sprintf(tcpDumpTpl, dscp)) + return err + } + + pingFromSrcPod := func(pod, dst string) error { + _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod, "--", "ping", "-c", "5", dst) + return err + } + + tcpDumpSync.Go(func() error { + return checkDSCPOnPod(dstPod, dscp) + }) + pingSync.Go(func() error { + return pingFromSrcPod(srcPod, dstPodIP) + }) + err := pingSync.Wait() + framework.ExpectNoError(err, "Failed to ping dst pod") + err = tcpDumpSync.Wait() + framework.ExpectNoError(err, "Failed to detect ping with correct DSCP on pod") +} + +func netcatExpectDscp(f *framework.Framework, srcPod, dstPodNamespace, dstPod, dstPodIP, tcpDumpTpl string, port, dscp int) { + tcpDumpSync := errgroup.Group{} + netcatSync := errgroup.Group{} + + checkDSCPOnPod := func(pod string, dscp int) error { + _, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", pod, "--", "timeout", "10", + "tcpdump", "-i", "any", "-c", "1", "-v", fmt.Sprintf(tcpDumpTpl, dscp)) + return err + } + + netcatFromSrcPod := func(pod, dst string) error { + _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod, "--", "bash", "-c", fmt.Sprintf("for i in {1..5}; do nc -vz -w 1 %s %d; sleep 1; done", dst, port)) + return err + } + + tcpDumpSync.Go(func() error { + return checkDSCPOnPod(dstPod, dscp) + }) + netcatSync.Go(func() error { + return netcatFromSrcPod(srcPod, dstPodIP) + }) + err := netcatSync.Wait() + framework.ExpectNoError(err, "Failed to connect to dst pod") + err = tcpDumpSync.Wait() + framework.ExpectNoError(err, "Failed to detect packets with correct DSCP on pod") +} + +func iperf3Test(f *framework.Framework, srcPod, dstIP string, port int, protocol ...string) float64 { + iperf3Sync := errgroup.Group{} + + iperfTest := func(pod, destIP string, port int, bps *float64) error { + args := []string{"exec", pod, "--", "iperf3", "-c", destIP, "-p", strconv.Itoa(port), "-J"} + if len(protocol) > 0 && protocol[0] == "udp" { + args = append(args, "-u", "-b", "0") + } + output, err := e2ekubectl.RunKubectl(f.Namespace.Name, args...) + if err != nil { + return err + } + var data map[string]interface{} + err = json.Unmarshal([]byte(output), &data) + if err != nil { + return err + } + end := data["end"].(map[string]interface{}) + if sum_sent, ok := end["sum_sent"]; ok { + *bps = sum_sent.(map[string]interface{})["bits_per_second"].(float64) + } else if sum, ok := end["sum"]; ok { + *bps = sum.(map[string]interface{})["bits_per_second"].(float64) + } + return nil + } + bps := 0.0 + iperf3Sync.Go(func() error { + return iperfTest(srcPod, dstIP, port, &bps) + }) + err := iperf3Sync.Wait() + framework.ExpectNoError(err, fmt.Sprintf("Failed to run iperf3 test for IP %s", dstIP)) + return bps +} + +func twoStreamIperf3Tests(f *framework.Framework, srcPod, dstPod1IP, dstPod2IP string, port int) float64 { + iperf3Sync1 := errgroup.Group{} + iperf3Sync2 := errgroup.Group{} + + iperfTest := func(pod, destIP string, port int, bps *float64) error { + output, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod, "--", "iperf3", "-c", destIP, "-p", strconv.Itoa(port), "-J") + if err != nil { + return err + } + var data map[string]interface{} + err = json.Unmarshal([]byte(output), &data) + if err != nil { + return err + } + end := data["end"].(map[string]interface{}) + sum_sent := end["sum_sent"].(map[string]interface{}) + *bps = sum_sent["bits_per_second"].(float64) + return nil + } + + bps1 := 0.0 + bps2 := 0.0 + + iperf3Sync1.Go(func() error { + return iperfTest(srcPod, dstPod1IP, port, &bps1) + }) + iperf3Sync2.Go(func() error { + return iperfTest(srcPod, dstPod2IP, port, &bps2) + }) + err := iperf3Sync1.Wait() + framework.ExpectNoError(err, fmt.Sprintf("Failed to run iperf3 test for IP %s", dstPod1IP)) + err = iperf3Sync2.Wait() + framework.ExpectNoError(err, fmt.Sprintf("Failed to run iperf3 test for IP %s", dstPod2IP)) + return bps1 + bps2 +} + +func pingExpectNoDscp(f *framework.Framework, srcPod, dstPodNamespace, dstPod, dstPodIP, tcpDumpTpl string, dscp int) { + tcpDumpSync := errgroup.Group{} + pingSync := errgroup.Group{} + + checkDSCPOnPod := func(pod string, dscp int) error { + output, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", pod, "--", "timeout", "10", + "tcpdump", "-i", "any", "-c", "1", "-v", fmt.Sprintf(tcpDumpTpl, dscp)) + if err != nil { + return err + } + if len(strings.TrimSpace(output)) == 0 { + return fmt.Errorf("no packets captured") + } + return nil + } + + pingFromSrcPod := func(pod, dst string) error { + _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod, "--", "ping", "-c", "5", dst) + return err + } + + tcpDumpSync.Go(func() error { + return checkDSCPOnPod(dstPod, dscp) + }) + pingSync.Go(func() error { + return pingFromSrcPod(srcPod, dstPodIP) + }) + err := pingSync.Wait() + gomega.Expect(err).To(gomega.BeNil()) + err = tcpDumpSync.Wait() + gomega.Expect(err).To(gomega.HaveOccurred()) +} + +func netcatExpectNoDscp(f *framework.Framework, srcPod, dstPodNamespace, dstPod, dstPodIP, tcpDumpTpl string, port, dscp int) { + tcpDumpSync := errgroup.Group{} + netcatSync := errgroup.Group{} + + checkDSCPOnPod := func(pod string, dscp int) error { + output, err := e2ekubectl.RunKubectl(dstPodNamespace, "exec", pod, "--", "timeout", "10", + "tcpdump", "-i", "any", "-c", "1", "-v", fmt.Sprintf(tcpDumpTpl, dscp)) + if err != nil { + return err + } + if len(strings.TrimSpace(output)) == 0 { + return fmt.Errorf("no packets captured") + } + return nil + } + + netcatFromSrcPod := func(pod, dst string) error { + _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", pod, "--", "bash", "-c", fmt.Sprintf("for i in {1..5}; do nc -vz -w 1 %s %d; sleep 1; done", dst, port)) + return err + } + + tcpDumpSync.Go(func() error { + return checkDSCPOnPod(dstPod, dscp) + }) + netcatSync.Go(func() error { + return netcatFromSrcPod(srcPod, dstPodIP) + }) + err := netcatSync.Wait() + framework.ExpectNoError(err, "Failed to connect to dst pod") + err = tcpDumpSync.Wait() + gomega.Expect(err).To(gomega.HaveOccurred()) +} diff --git a/test/e2e/node_ip_mac_migration.go b/test/e2e/node_ip_mac_migration.go index 68d5e0d654..d84ce6d737 100644 --- a/test/e2e/node_ip_mac_migration.go +++ b/test/e2e/node_ip_mac_migration.go @@ -6,9 +6,9 @@ import ( "encoding/binary" "fmt" "math/big" - "math/rand" "net" "os" + "os/exec" "path" "regexp" "strings" @@ -17,6 +17,11 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -39,10 +44,7 @@ var _ = Describe("Node IP and MAC address migration", func() { pollingInterval = 10 settleTimeout = 10 egressIPYaml = "egressip.yaml" - externalContainerImage = "registry.k8s.io/e2e-test-images/agnhost:2.26" - ciNetworkName = "kind" externalContainerName = "ip-migration-external" - externalContainerPort = "80" externalContainerEndpoint = "/clientip" egressIPYamlTemplate = `apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -78,25 +80,21 @@ spec: egressIP string assignedNodePort int32 ovnkPod v1.Pod - - podLabels = map[string]string{ + f = wrappedTestFramework(namespacePrefix) + providerCtx infraapi.Context + externalContainer infraapi.ExternalContainer + podLabels = map[string]string{ "app": "ip-migration-test", } - podCommand = []string{"/bin/bash", "-c", "/agnhost netexec --http-port 8000"} - externalContainerCommand = []string{"netexec", "--http-port=" + externalContainerPort} - + podCommand = []string{"/bin/bash", "-c", "/agnhost netexec --http-port 8000"} updateKubeletIPAddressMsg = map[bool]string{ true: "update kubelet first, the IP address later", false: "update the IP address first, kubelet later", } - - f = wrappedTestFramework(namespacePrefix) - - udpPort = int32(rand.Intn(1000) + 10000) - udpPortS = fmt.Sprintf("%d", udpPort) ) BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() By("Creating the temp directory") var err error tmpDirIPMigration, err = os.MkdirTemp("", "e2e") @@ -129,14 +127,17 @@ spec: By("Creating a cluster external container") externalContainerIPs = make(map[int]string) - externalContainerIPs[4], externalContainerIPs[6] = createClusterExternalContainer(externalContainerName, - externalContainerImage, []string{"--network", ciNetworkName, "-P"}, externalContainerCommand) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer = infraapi.ExternalContainer{Name: externalContainerName, Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to create external container") + externalContainerIPs[4], externalContainerIPs[6] = externalContainer.GetIPv4(), externalContainer.GetIPv6() }) AfterEach(func() { - By("Removing the external container") - deleteClusterExternalContainer(externalContainerName) - By("Removing the temp directory") Expect(os.RemoveAll(tmpDirIPMigration)).To(Succeed()) }) @@ -202,7 +203,7 @@ spec: }, pollingTimeout, pollingInterval).Should(BeTrue()) By(fmt.Sprintf("Finding worker node %s's IPv%d migration IP address", workerNode.Name, ipAddrFamily)) - // Pick something at the end of the range to avoid conflicts with the kind / docker network setup. + // Pick something at the end of the range to avoid conflicts with existing allocated IPs. // Also exclude the current node IPs and the egressIP (if already selected). var err error migrationWorkerNodeIP, err = findLastFreeSubnetIP( @@ -222,7 +223,7 @@ spec: true) Expect(err).NotTo(HaveOccurred()) - ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) @@ -230,6 +231,9 @@ spec: Expect(ovnkubeNodePods.Items).To(HaveLen(1)) ovnkubePodWorkerNode := ovnkubeNodePods.Items[0] + err = e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, ovnkubePodWorkerNode.GetName(), ovnkubePodWorkerNode.GetNamespace(), 200*time.Second) + framework.ExpectNoError(err, "failed waiting for ovnkube to be ready") + Eventually(func() bool { By("waiting for the ovn-encap-ip to be reconfigured") return isOVNEncapIPReady(workerNode.Name, workerNodeIPs[ipAddrFamily], ovnkubePodWorkerNode.Name) @@ -259,14 +263,15 @@ spec: By("Setting rollbackNeeded to true") rollbackNeeded = true - ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) Expect(err).NotTo(HaveOccurred()) Expect(ovnkubeNodePods.Items).To(HaveLen(1)) ovnkubePodWorkerNode := ovnkubeNodePods.Items[0] - + err = e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, ovnkubePodWorkerNode.GetName(), ovnkubePodWorkerNode.GetNamespace(), 200*time.Second) + framework.ExpectNoError(err, "failed waiting for ovnkube to be ready") Eventually(func() bool { By("waiting for the ovn-encap-ip to be reconfigured") return isOVNEncapIPReady(workerNode.Name, migrationWorkerNodeIP, ovnkubePodWorkerNode.Name) @@ -326,9 +331,8 @@ spec: } Eventually(func() bool { By("Checking the egress IP") - res, err := targetExternalContainerConnectToEndpoint(externalContainerName, - externalContainerIPs[ipAddrFamily], externalContainerPort, externalContainerEndpoint, - podWorkerNode.Name, f.Namespace.Name, expectedAnswer) + res, err := targetExternalContainerConnectToEndpoint(externalContainerIPs[ipAddrFamily], + externalContainer.ExtPort, externalContainerEndpoint, podWorkerNode.Name, f.Namespace.Name, expectedAnswer) if err != nil { framework.Logf("Current verification failed with %s", err) return false @@ -359,13 +363,15 @@ spec: By("Setting rollbackNeeded to true") rollbackNeeded = true - ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) Expect(err).NotTo(HaveOccurred()) Expect(ovnkubeNodePods.Items).To(HaveLen(1)) ovnkubePodWorkerNode := ovnkubeNodePods.Items[0] + err = e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, ovnkubePodWorkerNode.GetName(), ovnkubePodWorkerNode.GetNamespace(), 200*time.Second) + framework.ExpectNoError(err, "failed waiting for ovnkube to be ready") Eventually(func() bool { By("waiting for the ovn-encap-ip to be reconfigured") @@ -382,9 +388,8 @@ spec: } Eventually(func() bool { By("Checking the egress IP") - res, err := targetExternalContainerConnectToEndpoint(externalContainerName, - externalContainerIPs[ipAddrFamily], externalContainerPort, externalContainerEndpoint, - podWorkerNode.Name, f.Namespace.Name, expectedAnswer) + res, err := targetExternalContainerConnectToEndpoint(externalContainerIPs[ipAddrFamily], + externalContainer.ExtPort, externalContainerEndpoint, podWorkerNode.Name, f.Namespace.Name, expectedAnswer) if err != nil { framework.Logf("Current verification failed with %s", err) return false @@ -412,8 +417,10 @@ spec: BeforeEach(func() { By("creating a host-network backend pod") jig := e2eservice.NewTestJig(f.ClientSet, f.Namespace.Name, serviceName) - serverPod := e2epod.NewAgnhostPod(f.Namespace.Name, podName, nil, nil, []v1.ContainerPort{{ContainerPort: udpPort}, {ContainerPort: udpPort, Protocol: "UDP"}}, - "netexec", "--udp-port="+udpPortS) + udpPort := infraprovider.Get().GetK8HostPort() + serverPod := e2epod.NewAgnhostPod(f.Namespace.Name, podName, nil, nil, + []v1.ContainerPort{{ContainerPort: int32(udpPort)}, {ContainerPort: int32(udpPort), Protocol: "UDP"}}, + "netexec", fmt.Sprintf("--udp-port=%d", udpPort)) serverPod.Labels = jig.Labels serverPod.Spec.HostNetwork = true serverPod.Spec.NodeName = workerNode.Name @@ -438,7 +445,7 @@ spec: assignedNodePort = svc.Spec.Ports[0].NodePort // find the ovn-kube node pod on this node - pods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + pods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) @@ -488,13 +495,15 @@ spec: By("Setting rollbackNeeded to true") rollbackNeeded = true - ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) Expect(err).NotTo(HaveOccurred()) Expect(ovnkubeNodePods.Items).To(HaveLen(1)) ovnkubePodWorkerNode := ovnkubeNodePods.Items[0] + err = e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, ovnkubePodWorkerNode.GetName(), ovnkubePodWorkerNode.GetNamespace(), 200*time.Second) + framework.ExpectNoError(err, "failed waiting for ovnkube to be ready") Eventually(func() bool { By("waiting for the ovn-encap-ip to be reconfigured") @@ -522,8 +531,7 @@ spec: } // Due to potential k8s bug described here: https://github.com/ovn-org/ovn-kubernetes/issues/4073 // We may need to restart kubelet for the backend pod to update its host networked IP address - restartCmd := []string{"docker", "exec", workerNode.Name, "systemctl", "restart", "kubelet"} - _, restartErr := runCommand(restartCmd...) + _, restartErr := infraprovider.Get().ExecK8NodeCommand(workerNode.Name, []string{"systemctl", "restart", "kubelet"}) framework.ExpectNoError(restartErr) return false, nil }) @@ -537,7 +545,7 @@ spec: When("when MAC address changes", func() { BeforeEach(func() { By("Storing original MAC") - ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnkubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + workerNode.Name, }) @@ -570,7 +578,7 @@ spec: Name: serviceName, Protocol: v1.ProtocolUDP, Port: 80, - TargetPort: intstr.FromInt(int(udpPort)), + TargetPort: intstr.FromInt(8080), }, } s.Spec.Type = v1.ServiceTypeNodePort @@ -637,16 +645,17 @@ func checkFlowsForMAC(ovnkPod v1.Pod, mac net.HardwareAddr) error { } func setMACAddress(ovnkubePod v1.Pod, mac string) error { - cmd := []string{"kubectl", "-n", ovnkubePod.Namespace, "exec", ovnkubePod.Name, "-c", "ovn-controller", - "--", "ovs-vsctl", "set", "bridge", "breth0", fmt.Sprintf("other-config:hwaddr=%s", mac)} - _, err := runCommand(cmd...) - return err + cmd := fmt.Sprintf("ovs-vsctl set bridge %s other-config:hwaddr=%s", deploymentconfig.Get().ExternalBridgeName(), mac) + _, err := e2epodoutput.RunHostCmd(ovnkubePod.Namespace, ovnkubePod.Name, cmd) + if err != nil { + return fmt.Errorf("failed to set MAC address on ovs bridge: %v", err) + } + return nil } func getMACAddress(ovnkubePod v1.Pod) (net.HardwareAddr, error) { - cmd := []string{"kubectl", "-n", ovnkubePod.Namespace, "exec", ovnkubePod.Name, "-c", "ovn-controller", - "--", "ip", "link", "show", "breth0"} - output, err := runCommand(cmd...) + cmd := fmt.Sprintf("ip link show %s", deploymentconfig.Get().ExternalBridgeName()) + output, err := e2epodoutput.RunHostCmd(ovnkubePod.Namespace, ovnkubePod.Name, cmd) if err != nil { return nil, fmt.Errorf("failed to get ip link output: %w", err) } @@ -675,10 +684,48 @@ func getNodeInternalAddresses(node *v1.Node) (string, string) { return v4Addr, v6Addr } +// findIPAddressMaskInterfaceOnNode finds the string "/" and interface name on node +func findIPAddressMaskInterfaceOnNode(nodeName, ip string) (net.IPNet, string, error) { + ipAddressCmdOutput, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "-o", "address"}) + if err != nil { + return net.IPNet{}, "", err + } + re, err := regexp.Compile(fmt.Sprintf("%s/[0-9]{1,3}", ip)) + if err != nil { + return net.IPNet{}, "", err + } + ipAddressMask := "" + iface := "" + scanner := bufio.NewScanner(strings.NewReader(ipAddressCmdOutput)) + for scanner.Scan() { + line := scanner.Text() + ipAddressMask = re.FindString(line) + if ipAddressMask != "" { + if exploded := strings.Fields(line); len(exploded) > 1 { + iface = exploded[1] + } + break + } + } + if ipAddressMask == "" { + return net.IPNet{}, "", fmt.Errorf("IP address and mask were not found via `ip address` for node %s with IP %s", + nodeName, ip) + } + if iface == "" { + return net.IPNet{}, "", fmt.Errorf("interface not found for node %s with IP %s", + nodeName, ip) + } + parsedNetIP, parsedNetCIDR, err := net.ParseCIDR(ipAddressMask) + if err != nil { + return net.IPNet{}, "", err + } + return net.IPNet{IP: parsedNetIP, Mask: parsedNetCIDR.Mask}, iface, nil +} + // findIpAddressMaskOnHost finds the string "/" and interface name on container for // nodeIP. func findIPAddressMaskInterfaceOnHost(containerName, containerIP string) (net.IPNet, string, error) { - ipAddressCmdOutput, err := runCommand("docker", "exec", containerName, "ip", "-o", "address") + ipAddressCmdOutput, err := exec.Command("docker", "exec", containerName, "ip", "-o", "address").CombinedOutput() if err != nil { return net.IPNet{}, "", err } @@ -688,7 +735,7 @@ func findIPAddressMaskInterfaceOnHost(containerName, containerIP string) (net.IP } ipAddressMask := "" iface := "" - scanner := bufio.NewScanner(strings.NewReader(ipAddressCmdOutput)) + scanner := bufio.NewScanner(strings.NewReader(string(ipAddressCmdOutput))) for scanner.Scan() { line := scanner.Text() ipAddressMask = re.FindString(line) @@ -816,19 +863,17 @@ func subnetBroadcastIP(ipnet net.IPNet) net.IP { return net.IP(byteTargetIP) } -// isAddressReachableFromContainer will curl towards targetIP. If the curl succeeds, return true. Otherwise, check the +// isAddressReachableFromNode will curl towards targetIP. If the curl succeeds, return true. Otherwise, check the // node's neighbor table. If a neighbor entry for targetIP exists, return true, false otherwise. We use curl because // it's installed by default in the ubuntu kind containers; ping/arping are unfortunately not available. func isAddressReachableFromContainer(containerName, targetIP string) (bool, error) { // There's no ping/arping inside the default containers, so just use curl instead. It's good enough to trigger // ARP resolution. - cmd := []string{"docker", "exec", containerName} if utilnet.IsIPv6String(targetIP) { targetIP = fmt.Sprintf("[%s]", targetIP) } curlCommand := strings.Split(fmt.Sprintf("curl -g -q -s http://%s:%d", targetIP, 80), " ") - cmd = append(cmd, curlCommand...) - _, err := runCommand(cmd...) + _, err := infraprovider.Get().ExecK8NodeCommand(containerName, curlCommand) // If this curl works, then the node is logically reachable, shortcut. if err == nil { return true, nil @@ -836,7 +881,7 @@ func isAddressReachableFromContainer(containerName, targetIP string) (bool, erro // Now, check the neighbor table and if the entry does not have REACHABLE or STALE or PERMANENT, then this must be // an unreachable entry (could be FAILED or INCOMPLETE). - ipNeighborOutput, err := runCommand("docker", "exec", containerName, "ip", "neigh") + ipNeighborOutput, err := infraprovider.Get().ExecK8NodeCommand(containerName, []string{"ip", "neigh"}) if err != nil { return false, err } @@ -861,14 +906,12 @@ func isAddressReachableFromContainer(containerName, targetIP string) (bool, erro func isOVNEncapIPReady(nodeName, nodeIP, ovnkubePodName string) bool { framework.Logf("Verifying ovn-encap-ip for node %s", nodeName) - cmd := []string{"kubectl", "-n", ovnNamespace, "exec", ovnkubePodName, "-c", "ovn-controller", - "--", "ovs-vsctl", "get", "open_vswitch", ".", "external-ids:ovn-encap-ip"} - output, err := runCommand(cmd...) + cmd := "ovs-vsctl get open_vswitch . external-ids:ovn-encap-ip" + output, err := e2epodoutput.RunHostCmdWithRetries(deploymentconfig.Get().OVNKubernetesNamespace(), ovnkubePodName, cmd, 10*time.Millisecond, 10*time.Second) if err != nil { - framework.Logf("Failed to get ovn-encap-ip: %q", err) + framework.Logf("when running command on pod %s: %v", ovnkubePodName, err) return false } - output = strings.Replace(output, "\"", "", -1) output = strings.Replace(output, "\n", "", -1) @@ -890,13 +933,14 @@ func migrateWorkerNodeIP(nodeName, fromIP, targetIP string, invertOrder bool) (e if err != nil { for _, cmd := range cleanupCommands { framework.Logf("Attempting cleanup with command %q", cmd) - runCommand(cmd...) + output, err := infraprovider.Get().ExecK8NodeCommand(nodeName, cmd) + framework.ExpectNoError(err, "failed to cleanup node IP migration on node %s: %s", nodeName, output) } } }() framework.Logf("Finding fromIP %s on host %s", fromIP, nodeName) - parsedNetIPMask, iface, err := findIPAddressMaskInterfaceOnHost(nodeName, fromIP) + parsedNetIPMask, iface, err := findIPAddressMaskInterfaceOnNode(nodeName, fromIP) if err != nil { return err } @@ -912,22 +956,20 @@ func migrateWorkerNodeIP(nodeName, fromIP, targetIP string, invertOrder bool) (e newIPMask := targetIP + "/" + mask framework.Logf("Adding new IP address %s to node %s", newIPMask, nodeName) // Add cleanup command. - cleanupCmd := []string{"docker", "exec", nodeName, "ip", "address", "del", newIPMask, "dev", iface} + cleanupCmd := []string{"ip", "address", "del", newIPMask, "dev", iface} cleanupCommands = append(cleanupCommands, cleanupCmd) // Run command. - cmd := []string{"docker", "exec", nodeName, "ip", "address", "add", newIPMask, "dev", iface} - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "address", "add", newIPMask, "dev", iface}) if err != nil { - return err + return fmt.Errorf("failed to add new IP %s to interface %s on node %s: %v", newIPMask, iface, nodeName, err) } // Delete current IP address. On rollback, first add the old IP and then delete the new one. framework.Logf("Deleting current IP address %s from node %s", parsedNetIPMask.String(), nodeName) // Add cleanup command. - cleanupCmd = []string{"docker", "exec", nodeName, "ip", "address", "add", parsedNetIPMask.String(), "dev", iface} + cleanupCmd = []string{"ip", "address", "add", parsedNetIPMask.String(), "dev", iface} cleanupCommands = append([][]string{cleanupCmd}, cleanupCommands...) // Run command. - cmd = []string{"docker", "exec", nodeName, "ip", "address", "del", parsedNetIPMask.String(), "dev", iface} - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "address", "del", parsedNetIPMask.String(), "dev", iface}) if err != nil { return err } @@ -938,23 +980,21 @@ func migrateWorkerNodeIP(nodeName, fromIP, targetIP string, invertOrder bool) (e // Change kubeadm-flags.env IP. framework.Logf("Modifying kubelet configuration for node %s", nodeName) // Add cleanup commands. - cleanupCmd := []string{"docker", "exec", nodeName, "sed", "-i", fmt.Sprintf("s/node-ip=%s/node-ip=%s/", targetIP, fromIP), + cleanupCmd := []string{"sed", "-i", fmt.Sprintf("s/node-ip=%s/node-ip=%s/", targetIP, fromIP), "/var/lib/kubelet/kubeadm-flags.env"} cleanupCommands = append(cleanupCommands, cleanupCmd) - cleanupCmd = []string{"docker", "exec", nodeName, "systemctl", "restart", "kubelet"} + cleanupCmd = []string{"systemctl", "restart", "kubelet"} cleanupCommands = append(cleanupCommands, cleanupCmd) // Run command. - cmd := []string{"docker", "exec", nodeName, "sed", "-i", fmt.Sprintf("s/node-ip=%s/node-ip=%s/", fromIP, targetIP), + cmd := []string{"sed", "-i", fmt.Sprintf("s/node-ip=%s/node-ip=%s/", fromIP, targetIP), "/var/lib/kubelet/kubeadm-flags.env"} - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, cmd) if err != nil { - return err + return fmt.Errorf("failed to change kubelet node IP config: %v", err) } - // Restart kubelet. framework.Logf("Restarting kubelet on node %s", nodeName) - cmd = []string{"docker", "exec", nodeName, "systemctl", "restart", "kubelet"} - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"systemctl", "restart", "kubelet"}) if err != nil { return err } @@ -976,9 +1016,9 @@ func migrateWorkerNodeIP(nodeName, fromIP, targetIP string, invertOrder bool) (e // targetExternalContainerConnectToEndpoint targets the external test container from the specified pod and compares // expectedAnswer to the actual answer. -func targetExternalContainerConnectToEndpoint(externalContainerName, externalContainerIP, externalContainerPort, +func targetExternalContainerConnectToEndpoint(externalContainerIP string, externalContainerPort uint16, externalContainerEndpoint, podName, podNamespace string, expectedAnswer string) (bool, error) { - containerIPAndPort := net.JoinHostPort(externalContainerIP, externalContainerPort) + containerIPAndPort := net.JoinHostPort(externalContainerIP, fmt.Sprintf("%d", externalContainerPort)) u := path.Join(containerIPAndPort, externalContainerEndpoint) output, err := e2ekubectl.RunKubectl(podNamespace, "exec", podName, "--", "curl", "--max-time", "2", u) if err != nil { diff --git a/test/e2e/ovspinning.go b/test/e2e/ovspinning.go index fed38ac4f4..af72285ead 100644 --- a/test/e2e/ovspinning.go +++ b/test/e2e/ovspinning.go @@ -1,8 +1,16 @@ package e2e import ( + "context" + "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" ) var _ = ginkgo.Describe("OVS CPU affinity pinning", func() { @@ -10,20 +18,26 @@ var _ = ginkgo.Describe("OVS CPU affinity pinning", func() { f := wrappedTestFramework("ovspinning") ginkgo.It("can be enabled on specific nodes by creating enable_dynamic_cpu_affinity file", func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 1)) + nodeWithEnabledOvsAffinityPinning := nodes.Items[0].Name + nodeWithDisabledOvsAffinityPinning := nodes.Items[1].Name - nodeWithEnabledOvsAffinityPinning := "ovn-worker2" - - _, err := runCommand(containerRuntime, "exec", nodeWithEnabledOvsAffinityPinning, "bash", "-c", "echo 1 > /etc/openvswitch/enable_dynamic_cpu_affinity") + _, err = infraprovider.Get().ExecK8NodeCommand(nodeWithEnabledOvsAffinityPinning, []string{"bash", "-c", "echo 1 > /etc/openvswitch/enable_dynamic_cpu_affinity"}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - restartOVNKubeNodePodsInParallel(f.ClientSet, ovnNamespace, "ovn-worker", "ovn-worker2") + err = restartOVNKubeNodePodsInParallel(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), nodeWithEnabledOvsAffinityPinning, nodeWithDisabledOvsAffinityPinning) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) - enabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, ovnNamespace, "ovn-worker2", ".*ovspinning_linux.go.*$") + enabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), + nodeWithEnabledOvsAffinityPinning, ".*ovspinning_linux.go.*$") gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Expect(enabledNodeLogs).To(gomega.ContainSubstring("Starting OVS daemon CPU pinning")) - disabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, ovnNamespace, "ovn-worker", ".*ovspinning_linux.go.*$") + disabledNodeLogs, err := getOVNKubePodLogsFiltered(f.ClientSet, deploymentconfig.Get().OVNKubernetesNamespace(), + nodeWithDisabledOvsAffinityPinning, ".*ovspinning_linux.go.*$") gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Expect(disabledNodeLogs).To(gomega.ContainSubstring("OVS CPU affinity pinning disabled")) }) diff --git a/test/e2e/pod.go b/test/e2e/pod.go index 515158eae4..f5b7b12aae 100644 --- a/test/e2e/pod.go +++ b/test/e2e/pod.go @@ -4,31 +4,46 @@ import ( "context" "fmt" "math/rand" + "net" "regexp" + "strings" "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" + e2eservice "k8s.io/kubernetes/test/e2e/framework/service" + e2eutilsnet "k8s.io/utils/net" ) var _ = ginkgo.Describe("Pod to external server PMTUD", func() { const ( - echoServerPodNameTemplate = "echo-server-pod-%d" - echoClientPodName = "echo-client-pod" - echoServerPodPortMin = 9800 - echoServerPodPortMax = 9899 - primaryNetworkName = "kind" + echoServerNameTemplate = "echo-server-%d" + echoClientPodName = "echo-client" ) + var providerCtx infraapi.Context f := wrappedTestFramework("pod2external-pmtud") + + ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() + }) + cleanupFn := func() {} ginkgo.AfterEach(func() { @@ -43,18 +58,16 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { // * Set up a external docker container as a server // * Query from client pod to server pod // Traffic Flow: - // Req: podA on nodeA -> nodeA switch -> nodeA cluster-route -> nodeA transit switch -> nodeA join switch -> nodeA GR -> nodeA ext switch -> nodeA br-ex -> underlay - // underlay -> server + // Req: podA on nodeA -> nodeA switch -> nodeA cluster-router -> nodeA join switch -> nodeA GR -> nodeA ext switch -> nodeA br-ex -> underlay + // -> server // Res: server sends large packet -> br-ex on nodeA -> nodeA ext-switch -> rtoe-GR port sends back needs frag thanks to gateway_mtu option // ICMP needs frag goes back to external server // server now fragments packets correctly. // NOTE: on LGW, the pkt exits via mp0 on nodeA and path is different than what is described above // Frag needed is sent by nodeA using ovn-k8s-mp0 interface mtu and not OVN's GR for flows where services are not involved in LGW ginkgo.When("a client ovnk pod targeting an external server is created", func() { - var serverPodPort int - var serverPodName string - var serverNodeInternalIPs []string - + var externalContainer infraapi.ExternalContainer + var externalContainerIPs []string var clientPod *v1.Pod var clientPodNodeName string @@ -86,30 +99,23 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) ginkgo.By("Creating the external server") - serverPodPort = rand.Intn(echoServerPodPortMax-echoServerPodPortMin) + echoServerPodPortMin - serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) - framework.Logf("Creating server pod listening on TCP and UDP port %d", serverPodPort) - agntHostCmds := []string{"netexec", "--http-port", fmt.Sprintf("%d", serverPodPort), "--udp-port", fmt.Sprintf("%d", serverPodPort)} - externalIpv4, externalIpv6 := createClusterExternalContainer(serverPodName, agnhostImage, - []string{"--network", "kind", "-P", "--cap-add", "NET_ADMIN"}, - agntHostCmds, - ) - + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainerName := fmt.Sprintf(echoServerNameTemplate, externalContainerPort) + framework.Logf("Creating external container server pod listening on TCP and UDP port %d", externalContainerPort) + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get provider primary network") + externalContainer = infraapi.ExternalContainer{Name: externalContainerName, Image: images.AgnHost(), Network: providerPrimaryNetwork, + Args: []string{"netexec", "--http-port", fmt.Sprintf("%d", externalContainerPort), "--udp-port", fmt.Sprintf("%d", externalContainerPort)}, + ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to create external container (%s)", externalContainer) if isIPv4Supported() { - serverNodeInternalIPs = append(serverNodeInternalIPs, externalIpv4) + gomega.Expect(externalContainer.GetIPv4()).ToNot(gomega.BeEmpty()) + externalContainerIPs = append(externalContainerIPs, externalContainer.GetIPv4()) } - if isIPv6Supported() { - serverNodeInternalIPs = append(serverNodeInternalIPs, externalIpv6) - } - - gomega.Expect(len(serverNodeInternalIPs)).To(gomega.BeNumerically(">", 0)) - }) - - ginkgo.AfterEach(func() { - ginkgo.By("Removing external container") - if len(serverPodName) > 0 { - deleteClusterExternalContainer(serverPodName) + gomega.Expect(externalContainer.GetIPv6()).ToNot(gomega.BeEmpty()) + externalContainerIPs = append(externalContainerIPs, fmt.Sprintf("[%s]", externalContainer.GetIPv6())) } }) @@ -118,13 +124,17 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { // The payload is transmitted to and echoed from the echo service for both HTTP and UDP tests. ginkgo.When("tests are run towards the agnhost echo server", func() { ginkgo.It("queries to the hostNetworked server pod on another node shall work for TCP", func() { + gomega.Expect(len(externalContainerIPs)).Should(gomega.BeNumerically(">", 0)) for _, size := range []string{"small", "large"} { - for _, serverNodeIP := range serverNodeInternalIPs { - ginkgo.By(fmt.Sprintf("Sending TCP %s payload to node IP %s "+ - "and expecting to receive the same payload", size, serverNodeIP)) - cmd := fmt.Sprintf("curl --max-time 10 -g -q -s http://%s:%d/echo?msg=%s", - serverNodeIP, - serverPodPort, + for _, externalContainerIP := range externalContainerIPs { + if externalContainerIP == "" { + framework.Failf("expected to retrieve external container %s IP but it wasnt found", externalContainer.Name) + } + ginkgo.By(fmt.Sprintf("Sending TCP %s payload to container IP %s "+ + "and expecting to receive the same payload", size, externalContainerIP)) + cmd := fmt.Sprintf("curl --max-time 10 -g -q -s http://%s:%s/echo?msg=%s", + externalContainerIP, + externalContainer.GetPortStr(), echoPayloads[size], ) framework.Logf("Testing TCP %s with command %q", size, cmd) @@ -140,23 +150,27 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { } }) ginkgo.It("queries to the hostNetworked server pod on another node shall work for UDP", func() { - clientNodeIPv4, clientNodeIPv6 := getContainerAddressesForNetwork(clientPodNodeName, primaryNetworkName) // we always want to fetch from primary network - clientnodeIP := clientNodeIPv4 - if IsIPv6Cluster(f.ClientSet) { - clientnodeIP = clientNodeIPv6 + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + primaryInf, err := infraprovider.Get().GetK8NodeNetworkInterface(clientPodNodeName, providerPrimaryNetwork) + framework.ExpectNoError(err, "failed to get provider primary network interface info") + clientnodeIP := primaryInf.IPv4 + if IsIPv6Cluster(f.ClientSet) && isIPv6Supported() { + clientnodeIP = fmt.Sprintf("[%s]", primaryInf.IPv6) } + gomega.Expect(clientnodeIP).NotTo(gomega.BeEmpty()) for _, size := range []string{"small", "large"} { - for _, serverNodeIP := range serverNodeInternalIPs { + for _, externalContainerIP := range externalContainerIPs { if size == "large" { // Flushing the IP route cache will remove any routes in the cache // that are a result of receiving a "need to frag" packet. ginkgo.By("Flushing the ip route cache") - stdout, err := runCommand(containerRuntime, "exec", "-i", serverPodName, "ip", "route", "flush", "cache") + stdout, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "flush", "cache"}) framework.ExpectNoError(err, "Flushing the ip route cache failed") - framework.Logf("Flushed cache on %s", serverPodName) + framework.Logf("Flushed cache on %s", externalContainer.GetName()) // List the current IP route cache for informative purposes. cmd := fmt.Sprintf("ip route get %s", clientnodeIP) - stdout, err = runCommand(containerRuntime, "exec", "-i", serverPodName, "ip", "route", "get", clientnodeIP) + stdout, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "get", clientnodeIP}) framework.ExpectNoError(err, "Listing IP route cache") framework.Logf("%s: %s", cmd, stdout) } @@ -165,12 +179,12 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { // message, subsequent requests then should succeed. gomega.Eventually(func() error { ginkgo.By(fmt.Sprintf("Sending UDP %s payload to server IP %s "+ - "and expecting to receive the same payload", size, serverNodeIP)) + "and expecting to receive the same payload", size, externalContainerIP)) // Send payload via UDP. - cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %d", + cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %s", echoPayloads[size], - serverNodeIP, - serverPodPort, + externalContainerIP, + externalContainer.GetPortStr(), ) framework.Logf("Testing UDP %s with command %q", size, cmd) stdout, err := e2epodoutput.RunHostCmd( @@ -187,7 +201,7 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { if size == "large" { ginkgo.By("Making sure that the ip route cache contains an MTU route") // Get IP route cache and make sure that it contains an MTU route on the server side. - stdout, err = runCommand(containerRuntime, "exec", "-i", serverPodName, "ip", "route", "get", clientnodeIP) + stdout, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "get", clientnodeIP}) if err != nil { return fmt.Errorf("could not list IP route cache using cmd: %s, err: %q", cmd, err) } @@ -203,8 +217,8 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { // flush this on all 3 nodes else we will run into the // bug: https://issues.redhat.com/browse/OCPBUGS-7609. // TODO: Revisit this once https://bugzilla.redhat.com/show_bug.cgi?id=2169839 is fixed. - ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: "name=ovnkube-node", + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app=ovnkube-node", }) if err != nil { framework.Failf("could not get ovnkube-node pods: %v", err) @@ -215,12 +229,12 @@ var _ = ginkgo.Describe("Pod to external server PMTUD", func() { if isInterconnectEnabled() { containerName = "ovnkube-controller" } - _, err := e2ekubectl.RunKubectl(ovnNamespace, "exec", ovnKubeNodePod.Name, "--container", containerName, "--", + _, err := e2ekubectl.RunKubectl(deploymentconfig.Get().OVNKubernetesNamespace(), "exec", ovnKubeNodePod.Name, "--container", containerName, "--", "ip", "route", "flush", "cache") framework.ExpectNoError(err, "Flushing the ip route cache failed") } - framework.Logf("Flushing the ip route cache on %s", serverPodName) - _, err = runCommand(containerRuntime, "exec", "-i", serverPodName, "ip", "route", "flush", "cache") + framework.Logf("Flushing the ip route cache on %s", externalContainer.GetName()) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "route", "flush", "cache"}) framework.ExpectNoError(err, "Flushing the ip route cache failed") } } @@ -233,7 +247,6 @@ var _ = ginkgo.Describe("Pod to pod TCP with low MTU", func() { const ( echoServerPodNameTemplate = "echo-server-pod-%d" echoClientPodName = "echo-client-pod" - serverPodPort = 9899 mtu = 1400 ) @@ -284,11 +297,11 @@ var _ = ginkgo.Describe("Pod to pod TCP with low MTU", func() { e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) ginkgo.By("Creating hostNetwork:false (ovnk) server pod") - serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) + serverPodName = fmt.Sprintf(echoServerPodNameTemplate, 8080) serverPod = e2epod.NewAgnhostPod(f.Namespace.Name, serverPodName, nil, nil, nil, "netexec", - "--http-port", fmt.Sprintf("%d", serverPodPort), - "--udp-port", fmt.Sprintf("%d", serverPodPort), + "--http-port", fmt.Sprintf("8080"), + "--udp-port", fmt.Sprintf("8080"), ) serverPod.ObjectMeta.Labels = map[string]string{ "app": serverPodName, @@ -326,11 +339,15 @@ var _ = ginkgo.Describe("Pod to pod TCP with low MTU", func() { ginkgo.When("MTU is lowered between the two nodes", func() { ginkgo.It("large queries to the server pod on another node shall work for TCP", func() { for _, serverPodIP := range serverPod.Status.PodIPs { + if e2eutilsnet.IsIPv6String(serverPodIP.IP) { + serverPodIP.IP = fmt.Sprintf("[%s]", serverPodIP) + } + ginkgo.By(fmt.Sprintf("Sending TCP large payload to server IP %s "+ "and expecting to receive the same payload", serverPodIP)) cmd := fmt.Sprintf("curl --max-time 10 -g -q -s http://%s:%d/echo?msg=%s", serverPodIP.IP, - serverPodPort, + 8080, payload, ) framework.Logf("Testing large TCP segments with command %q", cmd) @@ -357,3 +374,438 @@ var _ = ginkgo.Describe("Pod to pod TCP with low MTU", func() { }) }) }) + +var _ = ginkgo.Describe("blocking ICMP needs frag", func() { + const ( + echoServerPodNameTemplate = "echo-server-pod-%d" + echoClientPodName = "echo-client-pod" + serverPodPort = 80 + mtu = 1500 + serviceName = "testservice" + echoServicePortMin = 31200 + echoServicePortMax = 31299 + ) + + var ipCmd = []string{"ip"} + var cs clientset.Interface + var echoMtuRegex = regexp.MustCompile(`expires.*mtu.*1400`) + f := wrappedTestFramework("icmp-needs-frag") + cleanupFn := func() { + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app=ovnkube-node", + }) + if err != nil { + framework.Failf("could not get ovnkube-node pods: %v", err) + } + for _, ovnKubeNodePod := range ovnKubeNodePods.Items { + framework.Logf("Flushing the ip route cache on %s", ovnKubeNodePod.Name) + containerName := "ovnkube-node" + if isInterconnectEnabled() { + containerName = "ovnkube-controller" + } + _, err := e2ekubectl.RunKubectl(deploymentconfig.Get().OVNKubernetesNamespace(), "exec", ovnKubeNodePod.Name, "--container", containerName, "--", + "ip", "route", "flush", "cache") + framework.ExpectNoError(err, "Flushing the ip route cache failed") + } + } + + ginkgo.BeforeEach(func() { + cs = f.ClientSet + if IsIPv6Cluster(f.ClientSet) { + ipCmd = []string{"ip", "-6"} + } + }) + + ginkgo.AfterEach(func() { + cleanupFn() + }) + + ginkgo.When("a client host networked pod with targets a proxy node nodeport service with ovnk networked backend", func() { + var serverPod *v1.Pod + var serverPodNodeName string + var serverPodName string + var clientNode v1.Node + var nodePortNode v1.Node + + var clientPod *v1.Pod + var clientPodNodeName string + var nodePort int + payload := fmt.Sprintf("%01420d", 1) + + ginkgo.BeforeEach(func() { + ginkgo.By("Selecting 3 schedulable nodes") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) + + ginkgo.By("Selecting nodes for client pod and server host-networked pod") + serverPodNodeName = nodes.Items[0].Name + clientPodNodeName = nodes.Items[1].Name + clientNode = nodes.Items[1] + nodePortNode = nodes.Items[2] + nodePort = rand.Intn(echoServicePortMax-echoServicePortMin) + echoServicePortMin + + ginkgo.By("Creating hostNetwork:true (ovnk) client pod") + clientPod = e2epod.NewAgnhostPod(f.Namespace.Name, echoClientPodName, nil, nil, nil) + clientPod.Spec.NodeName = clientPodNodeName + clientPod.Spec.HostNetwork = true + for k := range clientPod.Spec.Containers { + if clientPod.Spec.Containers[k].Name == "agnhost-container" { + clientPod.Spec.Containers[k].Command = []string{ + "sleep", + "infinity", + } + } + clientPod.Spec.Containers[k].SecurityContext = &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Add: []v1.Capability{"NET_ADMIN"}, + }, + } + } + e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) + + ginkgo.By(fmt.Sprintf("Creating nodeport service with port: %d", nodePort)) + jig := e2eservice.NewTestJig(cs, f.Namespace.Name, serviceName) + _, err = jig.CreateUDPService(context.TODO(), func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeNodePort + svc.Spec.Ports[0].NodePort = int32(nodePort) + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Creating an ovnk server pod") + serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) + serverPod = e2epod.NewAgnhostPod(f.Namespace.Name, serverPodName, nil, nil, nil, + "netexec", + "--http-port", fmt.Sprintf("%d", serverPodPort), + "--udp-port", fmt.Sprintf("%d", serverPodPort), + ) + serverPod.ObjectMeta.Labels = map[string]string{ + "app": serverPodName, + } + serverPod.Spec.NodeName = serverPodNodeName + serverPod.Labels = jig.Labels + serverPod = e2epod.NewPodClient(f).CreateSync(context.TODO(), serverPod) + }) + + ginkgo.It("should be able to send large UDP packet and not get a route cache entry", func() { + // Flushing the IP route cache will remove any routes in the cache + // that are a result of receiving a "need to frag" packet. + ginkgo.By("Flushing the ip route cache") + cmd := append(ipCmd, "route", "flush", "cache") + stdout, err := infraprovider.Get().ExecK8NodeCommand(clientNode.Name, cmd) + framework.ExpectNoError(err, "Flushing the ip route cache failed") + framework.Logf("Flushed cache on %s", clientNode.Name) + proxyIP := nodePortNode.Status.Addresses[0].Address + // List the current IP route cache for informative purposes. + cmd = append(ipCmd, "route", "get", proxyIP) + stdout, err = infraprovider.Get().ExecK8NodeCommand(clientNode.Name, cmd) + framework.ExpectNoError(err, "Listing IP route cache") + framework.Logf("%s: %s", cmd, stdout) + + ginkgo.By(fmt.Sprintf("Sending UDP large payload to server IP %s ", proxyIP)) + // Send payload via UDP. + udpCmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %d", + payload, + proxyIP, + nodePort, + ) + stdout, err = e2epodoutput.RunHostCmd( + clientPod.Namespace, + clientPod.Name, + udpCmd) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.BeEmpty()) + + ginkgo.By(fmt.Sprintf("Making sure that the ip route cache does not contain an MTU route on node: %s", clientNode.Name)) + // Get IP route cache and make sure that it contains an MTU route on the server side. + stdout, err = infraprovider.Get().ExecK8NodeCommand(clientNode.Name, cmd) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Route cache on server node %s", stdout) + if echoMtuRegex.Match([]byte(stdout)) { + ginkgo.Fail(fmt.Sprintf("Route cache has PMTUD value for proxy IP: %s, output: %s", proxyIP, stdout)) + } + }) + }) + + ginkgo.When("a client VM pod with 1500 MTU targets a host networked pod", func() { + var serverPod *v1.Pod + var serverPodNodeName string + var serverPodName string + var serverNode v1.Node + var clientNode v1.Node + + var clientPod *v1.Pod + var clientPodNodeName string + payload := fmt.Sprintf("%01420d", 1) + + ginkgo.BeforeEach(func() { + ginkgo.By("Selecting 2 schedulable nodes") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 1)) + + ginkgo.By("Selecting nodes for client pod and server host-networked pod") + serverPodNodeName = nodes.Items[0].Name + serverNode = nodes.Items[0] + clientPodNodeName = nodes.Items[1].Name + clientNode = nodes.Items[1] + + ginkgo.By(fmt.Sprintf("Creating ovnk client pod on node: %s", clientNode.Name)) + clientPod = e2epod.NewAgnhostPod(f.Namespace.Name, echoClientPodName, nil, nil, nil) + clientPod.Spec.NodeName = clientPodNodeName + for k := range clientPod.Spec.Containers { + if clientPod.Spec.Containers[k].Name == "agnhost-container" { + clientPod.Spec.Containers[k].Command = []string{ + "sleep", + "infinity", + } + } + clientPod.Spec.Containers[k].SecurityContext = &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Add: []v1.Capability{"NET_ADMIN"}, + }, + } + } + clientPod = e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) + + getPodIPWithRetry := func(clientSet clientset.Interface, v6 bool, namespace, name string) (net.IP, error) { + var srcPodIP net.IP + err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + pod, err := clientSet.CoreV1().Pods(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + return false, err + } + ips, err := util.DefaultNetworkPodIPs(pod) + if err != nil { + return false, err + } + srcPodIP, err = util.MatchFirstIPFamily(v6, ips) + if err != nil { + return false, err + } + return true, nil + }) + if err != nil || srcPodIP == nil { + return srcPodIP, fmt.Errorf("unable to fetch pod %s/%s IP after retrying: %v", namespace, name, err) + } + return srcPodIP, nil + } + + var clientPodIP net.IP + isV6 := IsIPv6Cluster(f.ClientSet) + clientPodIP, err = getPodIPWithRetry(f.ClientSet, isV6, f.Namespace.Name, clientPod.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(clientPodIP)).To(gomega.BeNumerically(">", 0)) + framework.Logf("Client pod IP is %s", clientPodIP) + prefix := "/24" + if isV6 { + prefix = "/64" + } + clientPodCIDR := clientPodIP.String() + prefix + clientMAC, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", echoClientPodName, "--", "cat", "/sys/class/net/eth0/address") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + clientMAC = strings.TrimSpace(clientMAC) + dummyMAC := "0a:58:0a:13:13:17" + externalServer := "8.8.8.8" + if isV6 { + externalServer = "2001:4860:4860::8888" + } + routeCmd := append(ipCmd, "route", "get", externalServer) + fullCmd := append([]string{"exec", echoClientPodName, "--"}, routeCmd...) + routeOutput, err := e2ekubectl.RunKubectl(f.Namespace.Name, fullCmd...) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + parsedRouteOutput := strings.Fields(routeOutput) + gomega.Expect(len(parsedRouteOutput)).To(gomega.BeNumerically(">", 3)) + gw := parsedRouteOutput[2] + if isV6 { + gw = parsedRouteOutput[4] + } + + ginkgo.By("Setting up a VM with linux bridge and veth in the pod") + cmds := [][]string{ + {"ip", "link", "add", "name", "br0", "type", "bridge"}, + {"ip", "link", "set", "br0", "up"}, + {"ip", "link", "set", "br0", "mtu", "1500"}, + {"ip", "link", "add", "veth0", "type", "veth", "peer", "name", "veth1"}, + {"ip", "link", "set", "veth0", "mtu", "1500"}, + {"ip", "link", "set", "veth1", "mtu", "1500"}, + {"ip", "link", "set", "eth0", "master", "br0"}, + {"ip", "link", "set", "veth0", "master", "br0"}, + {"ip", "addr", "flush", "dev", "eth0"}, + {"ip", "link", "set", "dev", "veth1", "down"}, + {"ip", "link", "set", "dev", "eth0", "down"}, + {"ip", "link", "set", "dev", "veth1", "address", clientMAC}, + {"ip", "link", "set", "dev", "eth0", "address", dummyMAC}, + {"ip", "link", "set", "dev", "veth1", "up"}, + {"ip", "link", "set", "dev", "eth0", "up"}, + append(ipCmd, "addr", "add", clientPodCIDR, "dev", "veth1"), + append(ipCmd, "route", "add", "default", "via", gw), + {"ip", "link", "set", "dev", "veth0", "up"}, + } + for _, cmd := range cmds { + fullCmd := []string{"exec", echoClientPodName, "--"} + fullCmd = append(fullCmd, cmd...) + stdout, err := e2ekubectl.RunKubectl(f.Namespace.Name, fullCmd...) + framework.ExpectNoError(err, fmt.Sprintf("setting up linux bridge failed, output: %s", stdout)) + } + + ginkgo.By(fmt.Sprintf("Creating an host networked server pod on node: %s", serverNode.Name)) + serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) + serverPod = e2epod.NewAgnhostPod(f.Namespace.Name, serverPodName, nil, nil, nil, + "netexec", + "--http-port", fmt.Sprintf("%d", serverPodPort), + "--udp-port", fmt.Sprintf("%d", serverPodPort), + ) + serverPod.ObjectMeta.Labels = map[string]string{ + "app": serverPodName, + } + serverPod.Spec.NodeName = serverPodNodeName + serverPod.Spec.HostNetwork = true + serverPod = e2epod.NewPodClient(f).CreateSync(context.TODO(), serverPod) + }) + + ginkgo.It("should be able to send large TCP packet and not get a route cache entry", func() { + // Flushing the IP route cache will remove any routes in the cache + // that are a result of receiving a "need to frag" packet. + ginkgo.By("Flushing the ip route cache") + flushCmd := append(ipCmd, "route", "flush", "cache") + stdout, err := infraprovider.Get().ExecK8NodeCommand(serverNode.Name, flushCmd) + framework.ExpectNoError(err, "Flushing the ip route cache failed") + framework.Logf("Flushed cache on %s", serverNode.Name) + clientNodeIP := clientNode.Status.Addresses[0].Address + serverIP := serverNode.Status.Addresses[0].Address + // List the current IP route cache for informative purposes. + routeCmd := append(ipCmd, "route", "get", clientNodeIP) + stdout, err = infraprovider.Get().ExecK8NodeCommand(serverNode.Name, routeCmd) + framework.ExpectNoError(err, "Listing IP route cache") + framework.Logf("%s: %s", routeCmd, stdout) + + curlDest := serverIP + isV6 := IsIPv6Cluster(f.ClientSet) + if isV6 { + curlDest = "[" + curlDest + "]" + } + ginkgo.By(fmt.Sprintf("Sending TCP large payload to server IP %s ", serverIP)) + cmd := fmt.Sprintf("curl --max-time 10 -g -q -s http://%s:%d/echo?msg=%s", + curlDest, + serverPodPort, + payload, + ) + // when the curl happens OVN will generate a needs frag towards the VM, and second curl should work + stdout, err = e2epodoutput.RunHostCmdWithRetries( + clientPod.Namespace, + clientPod.Name, + cmd, + framework.Poll, + 60*time.Second) + + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).NotTo(gomega.BeEmpty()) + ginkgo.By(fmt.Sprintf("Making sure that the ip route cache does not contain an MTU route on node: %s", serverNode.Name)) + // Get IP route cache and make sure that it contains an MTU route on the server side. + stdout, err = infraprovider.Get().ExecK8NodeCommand(serverNode.Name, routeCmd) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Route cache on server node %s", stdout) + if echoMtuRegex.Match([]byte(stdout)) { + ginkgo.Fail(fmt.Sprintf("Route cache has PMTUD value for client node IP: %s, output: %s", clientNodeIP, stdout)) + } + }) + }) + + ginkgo.When("an ovnk pod targets a host networked pod with large UDP", func() { + var serverPod *v1.Pod + var serverPodNodeName string + var serverPodName string + var serverNode v1.Node + var clientNode v1.Node + + var clientPod *v1.Pod + var clientPodNodeName string + payload := fmt.Sprintf("%01420d", 1) + + ginkgo.BeforeEach(func() { + ginkgo.By("Selecting 2 schedulable nodes") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 1)) + + ginkgo.By("Selecting nodes for client pod and server host-networked pod") + serverPodNodeName = nodes.Items[0].Name + serverNode = nodes.Items[0] + clientPodNodeName = nodes.Items[1].Name + clientNode = nodes.Items[1] + + ginkgo.By(fmt.Sprintf("Creating ovnk client pod on node: %s", clientNode.Name)) + clientPod = e2epod.NewAgnhostPod(f.Namespace.Name, echoClientPodName, nil, nil, nil) + clientPod.Spec.NodeName = clientPodNodeName + for k := range clientPod.Spec.Containers { + if clientPod.Spec.Containers[k].Name == "agnhost-container" { + clientPod.Spec.Containers[k].Command = []string{ + "sleep", + "infinity", + } + } + clientPod.Spec.Containers[k].SecurityContext = &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Add: []v1.Capability{"NET_ADMIN"}, + }, + } + } + clientPod = e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) + + ginkgo.By(fmt.Sprintf("Creating an host networked server pod on node: %s", serverNode.Name)) + serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) + serverPod = e2epod.NewAgnhostPod(f.Namespace.Name, serverPodName, nil, nil, nil, + "netexec", + "--http-port", fmt.Sprintf("%d", serverPodPort), + "--udp-port", fmt.Sprintf("%d", serverPodPort), + ) + serverPod.ObjectMeta.Labels = map[string]string{ + "app": serverPodName, + } + serverPod.Spec.NodeName = serverPodNodeName + serverPod.Spec.HostNetwork = true + serverPod = e2epod.NewPodClient(f).CreateSync(context.TODO(), serverPod) + + }) + + ginkgo.It("should be able to send large UDP packet and not get a route cache entry", func() { + // Flushing the IP route cache will remove any routes in the cache + // that are a result of receiving a "need to frag" packet. + ginkgo.By("Flushing the ip route cache") + flushCmd := append(ipCmd, "route", "flush", "cache") + stdout, err := infraprovider.Get().ExecK8NodeCommand(serverNode.Name, flushCmd) + framework.ExpectNoError(err, "Flushing the ip route cache failed") + framework.Logf("Flushed cache on %s", serverNode.Name) + clientNodeIP := clientNode.Status.Addresses[0].Address + // List the current IP route cache for informative purposes. + routeGetCmd := append(ipCmd, "route", "get", clientNodeIP) + stdout, err = infraprovider.Get().ExecK8NodeCommand(serverNode.Name, routeGetCmd) + framework.ExpectNoError(err, "Listing IP route cache") + framework.Logf("%s: %s", routeGetCmd, stdout) + serverIP := serverNode.Status.Addresses[0].Address + + ginkgo.By(fmt.Sprintf("Sending UDP large payload to server IP %s ", serverIP)) + // Send payload via UDP. + cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %d", + payload, + serverIP, + serverPodPort, + ) + stdout, err = e2epodoutput.RunHostCmd( + clientPod.Namespace, + clientPod.Name, + cmd) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.BeEmpty()) + ginkgo.By(fmt.Sprintf("Making sure that the ip route cache does not contain an MTU route on node: %s", serverNode.Name)) + // Get IP route cache and make sure that it does not contain an MTU cached route on the server side for client node. + stdout, err = infraprovider.Get().ExecK8NodeCommand(serverNode.Name, routeGetCmd) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + framework.Logf("Route cache on server node %s", stdout) + if echoMtuRegex.Match([]byte(stdout)) { + ginkgo.Fail(fmt.Sprintf("Route cache has PMTUD value for proxy IP: %s, output: %s", clientNodeIP, stdout)) + } + }) + }) +}) diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index 9b7fb939fe..bee77d639f 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -15,14 +15,13 @@ import ( apitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/types" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" udnclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilnet "k8s.io/utils/net" - - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/kubernetes/test/e2e/framework" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2enode "k8s.io/kubernetes/test/e2e/framework/node" @@ -30,6 +29,7 @@ import ( e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" e2eservice "k8s.io/kubernetes/test/e2e/framework/service" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + utilnet "k8s.io/utils/net" ) var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is advertised", func() { @@ -37,7 +37,6 @@ var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is serverContainerName = "bgpserver" routerContainerName = "frr" echoClientPodName = "echo-client-pod" - primaryNetworkName = "kind" bgpExternalNetworkName = "bgpnet" ) var serverContainerIPs []string @@ -47,18 +46,23 @@ var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is ginkgo.BeforeEach(func() { serverContainerIPs = []string{} - - bgpServerIPv4, bgpServerIPv6 := getContainerAddressesForNetwork(serverContainerName, bgpExternalNetworkName) - if isIPv4Supported() { - serverContainerIPs = append(serverContainerIPs, bgpServerIPv4) + bgpNetwork, err := infraprovider.Get().GetNetwork(bgpExternalNetworkName) + framework.ExpectNoError(err, "must get bgpnet network") + bgpServer := infraapi.ExternalContainer{Name: serverContainerName} + networkInterface, err := infraprovider.Get().GetExternalContainerNetworkInterface(bgpServer, bgpNetwork) + framework.ExpectNoError(err, "container %s attached to network %s must contain network info", serverContainerName, bgpExternalNetworkName) + if isIPv4Supported() && len(networkInterface.IPv4) > 0 { + serverContainerIPs = append(serverContainerIPs, networkInterface.IPv4) } - - if isIPv6Supported() { - serverContainerIPs = append(serverContainerIPs, bgpServerIPv6) + if isIPv6Supported() && len(networkInterface.IPv6) > 0 { + serverContainerIPs = append(serverContainerIPs, networkInterface.IPv6) } framework.Logf("The external server IPs are: %+v", serverContainerIPs) - - frrContainerIPv4, frrContainerIPv6 = getContainerAddressesForNetwork(routerContainerName, primaryNetworkName) + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "provider primary network must be available") + externalContainerNetInf, err := infraprovider.Get().GetExternalContainerNetworkInterface(infraapi.ExternalContainer{Name: routerContainerName}, providerPrimaryNetwork) + framework.ExpectNoError(err, "external container %s network %s information must be available", routerContainerName, providerPrimaryNetwork.Name()) + frrContainerIPv4, frrContainerIPv6 = externalContainerNetInf.IPv4, externalContainerNetInf.IPv6 framework.Logf("The frr router container IPs are: %s/%s", frrContainerIPv4, frrContainerIPv6) }) @@ -109,16 +113,17 @@ var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is // This test ensures the north-south connectivity is happening through podIP ginkgo.It("tests are run towards the external agnhost echo server", func() { ginkgo.By("routes from external bgp server are imported by nodes in the cluster") - externalServerV4CIDR, externalServerV6CIDR := getContainerNetworkCIDRs(bgpExternalNetworkName) + bgpNetwork, err := infraprovider.Get().GetNetwork(bgpExternalNetworkName) + framework.ExpectNoError(err, "network %s must be available and precreated before test run", bgpExternalNetworkName) + externalServerV4CIDR, externalServerV6CIDR, err := bgpNetwork.IPv4IPv6Subnets() + framework.ExpectNoError(err, "must get bgpnet subnets") framework.Logf("the network cidrs to be imported are v4=%s and v6=%s", externalServerV4CIDR, externalServerV6CIDR) for _, node := range nodes.Items { ipVer := "" - cmd := []string{containerRuntime, "exec", node.Name} bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, externalServerV4CIDR), " ") - cmd = append(cmd, bgpRouteCommand...) framework.Logf("Checking for server's route in node %s", node.Name) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecK8NodeCommand(node.GetName(), bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from node") framework.Logf("Routes in node %s", routes) return strings.Contains(routes, frrContainerIPv4) @@ -127,12 +132,10 @@ var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is ipVer = " -6" nodeIPv6LLA, err := GetNodeIPv6LinkLocalAddressForEth0(routerContainerName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - cmd := []string{containerRuntime, "exec", node.Name} bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, externalServerV6CIDR), " ") - cmd = append(cmd, bgpRouteCommand...) framework.Logf("Checking for server's route in node %s", node.Name) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecK8NodeCommand(node.GetName(), bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from node") framework.Logf("Routes in node %s", routes) return strings.Contains(routes, nodeIPv6LLA) @@ -183,12 +186,11 @@ var _ = ginkgo.Describe("BGP: Pod to external server when default podNetwork is } gomega.Expect(len(nodeIP)).To(gomega.BeNumerically(">", 0)) framework.Logf("the nodeIP for node %s is %+v", node.Name, nodeIP) - cmd := []string{containerRuntime, "exec", routerContainerName} + externalContainer := infraapi.ExternalContainer{Name: routerContainerName} bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, podCIDR), " ") - cmd = append(cmd, bgpRouteCommand...) framework.Logf("Checking for node %s's route for pod subnet %s", node.Name, podCIDR) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) return strings.Contains(routes, nodeIP[0]) @@ -236,7 +238,6 @@ var _ = ginkgo.Describe("BGP: Pod to external server when CUDN network is advert serverContainerName = "bgpserver" routerContainerName = "frr" echoClientPodName = "echo-client-pod" - primaryNetworkName = "kind" bgpExternalNetworkName = "bgpnet" placeholder = "PLACEHOLDER_NAMESPACE" ) @@ -259,17 +260,25 @@ var _ = ginkgo.Describe("BGP: Pod to external server when CUDN network is advert serverContainerIPs = []string{} - bgpServerIPv4, bgpServerIPv6 := getContainerAddressesForNetwork(serverContainerName, bgpExternalNetworkName) - if isIPv4Supported() { - serverContainerIPs = append(serverContainerIPs, bgpServerIPv4) + bgpNetwork, err := infraprovider.Get().GetNetwork(bgpExternalNetworkName) // pre-created network + framework.ExpectNoError(err, "must get bgpnet network") + bgpServer := infraapi.ExternalContainer{Name: serverContainerName} + networkInterface, err := infraprovider.Get().GetExternalContainerNetworkInterface(bgpServer, bgpNetwork) + framework.ExpectNoError(err, "container %s attached to network %s must contain network info", serverContainerName, bgpExternalNetworkName) + if isIPv4Supported() && len(networkInterface.IPv4) > 0 { + serverContainerIPs = append(serverContainerIPs, networkInterface.IPv4) } - - if isIPv6Supported() { - serverContainerIPs = append(serverContainerIPs, bgpServerIPv6) + if isIPv6Supported() && len(networkInterface.IPv6) > 0 { + serverContainerIPs = append(serverContainerIPs, networkInterface.IPv6) } + gomega.Expect(len(serverContainerIPs)).Should(gomega.BeNumerically(">", 0), "failed to find external container IPs") framework.Logf("The external server IPs are: %+v", serverContainerIPs) - - frrContainerIPv4, frrContainerIPv6 = getContainerAddressesForNetwork(routerContainerName, primaryNetworkName) + providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "provider primary network must be available") + frrContainer := infraapi.ExternalContainer{Name: routerContainerName} + networkInterface, err = infraprovider.Get().GetExternalContainerNetworkInterface(frrContainer, providerPrimaryNetwork) + framework.ExpectNoError(err, "container %s attached to network %s must contain network info", routerContainerName, providerPrimaryNetwork.Name()) + frrContainerIPv4, frrContainerIPv6 = networkInterface.IPv4, networkInterface.IPv6 framework.Logf("The frr router container IPs are: %s/%s", frrContainerIPv4, frrContainerIPv6) // Select nodes here so they're available for all tests @@ -357,16 +366,17 @@ var _ = ginkgo.Describe("BGP: Pod to external server when CUDN network is advert // Advertisement will curl the external server container sitting outside the cluster via a FRR router // This test ensures the north-south connectivity is happening through podIP ginkgo.By("routes from external bgp server are imported by nodes in the cluster") - externalServerV4CIDR, externalServerV6CIDR := getContainerNetworkCIDRs(bgpExternalNetworkName) + bgpNetwork, err := infraprovider.Get().GetNetwork(bgpExternalNetworkName) + framework.ExpectNoError(err, "network %s must be available", bgpExternalNetworkName) + externalServerV4CIDR, externalServerV6CIDR, err := bgpNetwork.IPv4IPv6Subnets() + framework.ExpectNoError(err, "must get BGP network subnets") framework.Logf("the network cidrs to be imported are v4=%s and v6=%s", externalServerV4CIDR, externalServerV6CIDR) for _, node := range nodes.Items { ipVer := "" - cmd := []string{containerRuntime, "exec", node.Name} bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, externalServerV4CIDR), " ") - cmd = append(cmd, bgpRouteCommand...) framework.Logf("Checking for server's route in node %s", node.Name) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecK8NodeCommand(node.GetName(), bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from node") framework.Logf("Routes in node %s", routes) return strings.Contains(routes, frrContainerIPv4) @@ -375,12 +385,10 @@ var _ = ginkgo.Describe("BGP: Pod to external server when CUDN network is advert ipVer = " -6" nodeIPv6LLA, err := GetNodeIPv6LinkLocalAddressForEth0(routerContainerName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - cmd := []string{containerRuntime, "exec", node.Name} bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, externalServerV6CIDR), " ") - cmd = append(cmd, bgpRouteCommand...) framework.Logf("Checking for server's route in node %s", node.Name) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecK8NodeCommand(node.GetName(), bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from node") framework.Logf("Routes in node %s", routes) return strings.Contains(routes, nodeIPv6LLA) @@ -525,10 +533,10 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork var ra *rav1.RouteAdvertisements - if cudnATemplate.Spec.Network.Topology == types.Layer2Topology && isLocalGWModeEnabled() { - e2eskipper.Skipf("Advertising Layer2 UDNs is not currently supported in LGW") - } ginkgo.BeforeEach(func() { + if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 && isLocalGWModeEnabled() { + e2eskipper.Skipf("Advertising Layer2 UDNs is not currently supported in LGW") + } ginkgo.By("Configuring primary UDN namespaces") var err error udnNamespaceA, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ @@ -666,6 +674,9 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" }) ginkgo.AfterEach(func() { + if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 && isLocalGWModeEnabled() { + return + } gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) @@ -728,9 +739,7 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" } } else { framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) - nodeCmd := []string{containerRuntime, "exec", clientName} - nodeCmd = append(nodeCmd, curlCmd...) - out, err = runCommand(nodeCmd...) + out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) if err != nil { // out is empty on error and error contains out... return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) @@ -803,28 +812,47 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" framework.ExpectNoError(err) return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", clientPodStatus.IPs[ipFamilyIndex].IP.String(), false }), - // TODO: Enable the following tests once pod-pod on different advertised networks isolation is addressed - //ginkgo.Entry("pod to pod on different networks and same node should not work", - // func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // // podsNetA[2] and podNetB are on the same node - // clientPod := podsNetA[2] - // srvPod := podNetB - // - // srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - // framework.ExpectNoError(err) - // return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true - // }), - - //ginkgo.Entry("pod to pod on different networks and different nodes should not work", - // func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // // podsNetA[0] and podNetB are on different nodes - // clientPod := podsNetA[0] - // srvPod := podNetB - // - // srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - // framework.ExpectNoError(err) - // return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true - // }), + ginkgo.Entry("pod to pod on different networks and same node should not work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[2] and podNetB are on the same node + clientPod := podsNetA[2] + srvPod := podNetB + + srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true + }), + + ginkgo.Entry("pod to pod on different networks and different nodes should not work", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podNetB are on different nodes + clientPod := podsNetA[0] + srvPod := podNetB + + srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podNetB are on the same node + clientPod := podNetDefault + srvPod := podNetB + + srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", + func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podsNetA[0] are on different nodes + clientPod := podNetDefault + srvPod := podsNetA[0] + + srvPodStatus, err := userDefinedNetworkStatus(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(srvPodStatus.IPs[ipFamilyIndex].IP.String(), "8080") + "/clientip", curlConnectionTimeoutCode, true + }), ginkgo.Entry("pod in the default network should not be able to access a UDN service", func(ipFamilyIndex int) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(svcNetA.Spec.ClusterIPs[ipFamilyIndex], "8080") + "/clientip", curlConnectionTimeoutCode, true diff --git a/test/e2e/service.go b/test/e2e/service.go index 48fa65d63d..664a01e8ea 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -3,10 +3,12 @@ package e2e import ( "context" "encoding/json" + "errors" "fmt" "math/rand" "net" "os" + "os/exec" "regexp" "strconv" "strings" @@ -14,6 +16,11 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -48,14 +55,9 @@ var ( var _ = ginkgo.Describe("Services", func() { const ( serviceName = "testservice" - ovnWorkerNode = "ovn-worker" echoServerPodNameTemplate = "echo-server-pod-%d" echoClientPodName = "echo-client-pod" - echoServiceNameTemplate = "echo-service-%d" - echoServerPodPortMin = 9800 - echoServerPodPortMax = 9899 - echoServicePortMin = 31200 - echoServicePortMax = 31299 + echoServiceName = "echo-service" ) f := wrappedTestFramework("services") @@ -71,9 +73,6 @@ var _ = ginkgo.Describe("Services", func() { cleanupFn() }) - udpPort := int32(rand.Intn(1000) + 10000) - udpPortS := fmt.Sprintf("%d", udpPort) - ginkgo.It("Allow connection to an external IP using a source port that is equal to a node port", func() { const ( nodePort = 31990 @@ -109,13 +108,14 @@ var _ = ginkgo.Describe("Services", func() { jig := e2eservice.NewTestJig(cs, namespace, serviceName) ginkgo.By("Creating a ClusterIP service") + targetPort := infraprovider.Get().GetK8HostPort() service, err := jig.CreateUDPService(context.TODO(), func(s *v1.Service) { s.Spec.Ports = []v1.ServicePort{ { Name: "udp", Protocol: v1.ProtocolUDP, Port: 80, - TargetPort: intstr.FromInt(int(udpPort)), + TargetPort: intstr.FromInt(int(targetPort)), }, } }) @@ -123,8 +123,8 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("creating a host-network backend pod") - serverPod := e2epod.NewAgnhostPod(namespace, "backend", nil, nil, []v1.ContainerPort{{ContainerPort: (udpPort)}, {ContainerPort: (udpPort), Protocol: "UDP"}}, - "netexec", "--udp-port="+udpPortS) + serverPod := e2epod.NewAgnhostPod(namespace, "backend", nil, nil, []v1.ContainerPort{{ContainerPort: (int32(targetPort))}, {ContainerPort: (int32(targetPort)), Protocol: "UDP"}}, + "netexec", fmt.Sprintf("--udp-port=%d", targetPort)) serverPod.Labels = jig.Labels serverPod.Spec.HostNetwork = true @@ -133,7 +133,7 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Connecting to the service from another host-network pod on node " + nodeName) // find the ovn-kube node pod on this node - pods, err := cs.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + pods, err := cs.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + nodeName, }) @@ -163,10 +163,11 @@ var _ = ginkgo.Describe("Services", func() { var createdPods []*v1.Pod maxContainerFailures := 0 replicas := 3 + targetPort := infraprovider.Get().GetK8HostPort() config := testutils.RCConfig{ Client: cs, Image: imageutils.GetE2EImage(imageutils.Agnhost), - Command: []string{"/agnhost", "serve-hostname"}, + Command: []string{"/agnhost", "serve-hostname", fmt.Sprintf("--port=%d", targetPort)}, Name: "backend", Labels: jig.Labels, Namespace: namespace, @@ -186,7 +187,7 @@ var _ = ginkgo.Describe("Services", func() { s.Spec.Ports = []v1.ServicePort{{ Port: int32(servicePort), // agnhost serve-hostname port - TargetPort: intstr.FromInt32(9376), + TargetPort: intstr.FromInt(int(targetPort)), Protocol: v1.ProtocolTCP, }} }) @@ -229,13 +230,11 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.When(fmt.Sprintf("a nodePort service targeting a pod with hostNetwork:%t is created", hostNetwork), func() { var serverPod *v1.Pod var serverPodNodeName string - var serverPodPort int var serverPodName string - var svc v1.Service + var svc *v1.Service var serviceNode v1.Node var serviceNodeInternalIPs []string - var servicePort int var clientPod *v1.Pod var clientPodNodeName string @@ -245,6 +244,8 @@ var _ = ginkgo.Describe("Services", func() { "large": fmt.Sprintf("%01420d", 1), } var echoMtuRegex = regexp.MustCompile(`cache expires.*mtu.*`) + tcpPortName := "tcp-port" + udpPortName := "udp-port" ginkgo.BeforeEach(func() { ginkgo.By("Selecting 3 schedulable nodes") @@ -280,15 +281,16 @@ var _ = ginkgo.Describe("Services", func() { // Create the server pod. // Wait for 1 minute and if the pod does not come up, select a different port and try again. // Wait for a max of 5 minutes. + serverPodPortTCP := infraprovider.Get().GetK8HostPort() // maybe a host net or cluster net pod but select host port anyway + serverPodPortUDP := infraprovider.Get().GetK8HostPort() // maybe a host net or cluster net pod but select host port anyway gomega.Eventually(func() error { - serverPodPort = rand.Intn(echoServerPodPortMax-echoServerPodPortMin) + echoServerPodPortMin - serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPort) - framework.Logf("Creating server pod listening on TCP and UDP port %d", serverPodPort) + serverPodName = fmt.Sprintf(echoServerPodNameTemplate, serverPodPortTCP) + framework.Logf("Creating server pod listening on TCP and UDP port %d", serverPodPortTCP) serverPod = e2epod.NewAgnhostPod(f.Namespace.Name, serverPodName, nil, nil, nil, "netexec", "--http-port", - fmt.Sprintf("%d", serverPodPort), + fmt.Sprintf("%d", serverPodPortTCP), "--udp-port", - fmt.Sprintf("%d", serverPodPort)) + fmt.Sprintf("%d", serverPodPortUDP)) serverPod.ObjectMeta.Labels = map[string]string{ "app": serverPodName, } @@ -310,30 +312,36 @@ var _ = ginkgo.Describe("Services", func() { // If the servicePorts are already in use, creating the service should fail and we should choose another // random port. gomega.Eventually(func() error { - servicePort = rand.Intn(echoServicePortMax-echoServicePortMin) + echoServicePortMin - framework.Logf("Creating the nodePort service listening on TCP and UDP port %d and targeting pod port %d", - servicePort, serverPodPort) - svc = v1.Service{ - ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf(echoServiceNameTemplate, servicePort)}, + servicePortTCP := rand.Intn(32767-30000) + 30000 + servicePortUDP := rand.Intn(32767-30000) + 30000 + + framework.Logf("Creating the nodePort service") + svc = &v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: echoServiceName}, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ { - Name: "tcp-port", - NodePort: int32(servicePort), - Port: int32(serverPodPort), - Protocol: v1.ProtocolTCP, + Name: tcpPortName, + NodePort: int32(servicePortTCP), + Port: int32(serverPodPortTCP), + TargetPort: intstr.FromInt(int(serverPodPortTCP)), + Protocol: v1.ProtocolTCP, }, { - Name: "udp-port", - NodePort: int32(servicePort), - Port: int32(serverPodPort), - Protocol: v1.ProtocolUDP, + Name: udpPortName, + NodePort: int32(servicePortUDP), + Port: int32(serverPodPortUDP), + TargetPort: intstr.FromInt(int(serverPodPortUDP)), + Protocol: v1.ProtocolUDP, }, }, Selector: map[string]string{"app": serverPodName}, Type: v1.ServiceTypeNodePort}, } - _, err := f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.TODO(), &svc, metav1.CreateOptions{}) + svc, err = f.ClientSet.CoreV1().Services(f.Namespace.Name).Create(context.TODO(), svc, metav1.CreateOptions{}) + if err != nil { + framework.Logf("creating service failed, err: %v", err) + } return err }, 60*time.Second, 1*time.Second).Should(gomega.Succeed()) }) @@ -342,18 +350,29 @@ var _ = ginkgo.Describe("Services", func() { // a large (1420 bytes + overhead for echo service) payload. // The payload is transmitted to and echoed from the echo service for both HTTP and UDP tests. ginkgo.When("tests are run towards the agnhost echo service", func() { + ginkgo.It("queries to the nodePort service shall work for TCP", func() { - for _, size := range []string{"small", "large"} { + tcpNodePort, _ := nodePortsFromService(svc) + packetSizes := []string{"small", "large"} + if isLocalGWModeEnabled() && hostNetwork { + // if local gateway mode the intermediary node will attempt to fragment the packet, if the DF + // bit is not set. However, the decision on setting DF bit is left up to the kernel, and + // is unpredictable. If the DF bit is set, the iptables rule that DNATs nodeport -> cluster IP + // will then attempt to route the packet, and hit our 1400 byte MTU route. This will cause: + // 172.18.0.2:37755->10.96.141.254:9881(udp) sk_skb_reason_drop(SKB_DROP_REASON_PKT_TOO_BIG) + packetSizes = []string{"small"} + } + for _, size := range packetSizes { for _, serviceNodeIP := range serviceNodeInternalIPs { - targetIP := serviceNodeIP + serviceNodeIP := serviceNodeIP if IsIPv6Cluster(f.ClientSet) { - targetIP = fmt.Sprintf("[%s]", targetIP) + serviceNodeIP = fmt.Sprintf("[%s]", serviceNodeIP) } ginkgo.By(fmt.Sprintf("Sending TCP %s payload to service IP %s "+ "and expecting to receive the same payload", size, serviceNodeIP)) cmd := fmt.Sprintf("curl --max-time 10 -g -q -s http://%s:%d/echo?msg=%s", - targetIP, - servicePort, + serviceNodeIP, + tcpNodePort, echoPayloads[size], ) framework.Logf("Testing TCP %s with command %q", size, cmd) @@ -395,13 +414,32 @@ var _ = ginkgo.Describe("Services", func() { // 0.0.0.0/0 172.18.0.1 dst-ip rtoe-GR_ovn-worker // This time, the packet will leave the rtoj port and it will be fragmented. ginkgo.It("queries to the nodePort service shall work for UDP", func() { - for _, size := range []string{"small", "large"} { + packetSizes := []string{"small", "large"} + // If gateway mode is shared, and endpoint is OVN networked, host networked originated packets + // exceeding pod MTU will not be delivered. This is because ICMP needs frag will be sent back to the original + // Kubernetes node by OVN (even if DF bit is not set) and the node will refuse to install an MTU cache route. + // To fix this later we can install ip rules that match on nodeport and lower the MTU from the originator + // but for now we consider nodeport access from a k8s node as not a practical use case. See + // https://issues.redhat.com/browse/OCPBUGS-7609 + // Furthermore, in local gateway mode, if the DF bit was not set the packet will go into the host of + // intermediary node, where nodeport will be DNAT'ed to cluster IP service, and then hit the MTU 1400 route. + // Netcat will not set Don't Fragment (DF) bit, so packet will be fragmented at intermediary + // node and sent to server. However, it is up to the kernel to decide whether to set the DF bit, + // and it is not predictable. Therefore, we have to skip large packet size for local gateway mode + // as well. This is true when the endpoint is host or ovn networked, because the route for the cluster + // cidr service is set to 1400, which causes: + // 172.18.0.2:37755->10.96.141.254:9881(udp) sk_skb_reason_drop(SKB_DROP_REASON_PKT_TOO_BIG) + if !hostNetwork || isLocalGWModeEnabled() { + packetSizes = []string{"small"} + } + + for _, size := range packetSizes { for _, serviceNodeIP := range serviceNodeInternalIPs { flushCmd := "ip route flush cache" if utilnet.IsIPv6String(serviceNodeIP) { flushCmd = "ip -6 route flush cache" } - if size == "large" && !hostNetwork { + if size == "large" { // Flushing the IP route cache will remove any routes in the cache // that are a result of receiving a "need to frag" packet. ginkgo.By("Flushing the ip route cache") @@ -422,10 +460,10 @@ var _ = ginkgo.Describe("Services", func() { framework.ExpectNoError(err, "Listing IP route cache") framework.Logf("%s: %s", cmd, stdout) } - // We expect the following to fail at least once for large payloads and non-hostNetwork // endpoints: the first request will fail as we have to receive a "need to frag" ICMP // message, subsequent requests then should succeed. + _, udpNodePort := nodePortsFromService(svc) gomega.Eventually(func() error { ginkgo.By(fmt.Sprintf("Sending UDP %s payload to service IP %s "+ "and expecting to receive the same payload", size, serviceNodeIP)) @@ -433,7 +471,7 @@ var _ = ginkgo.Describe("Services", func() { cmd := fmt.Sprintf("echo 'echo %s' | nc -w2 -u %s %d", echoPayloads[size], serviceNodeIP, - servicePort, + udpNodePort, ) framework.Logf("Testing UDP %s with command %q", size, cmd) stdout, err := e2epodoutput.RunHostCmd( @@ -462,23 +500,9 @@ var _ = ginkgo.Describe("Services", func() { if err != nil { return fmt.Errorf("could not list IP route cache, err: %q", err) } - if !hostNetwork || isLocalGWModeEnabled() { - // with local gateway mode the packet will be sent: - // client -> intermediary node -> server - // With local gw mode, the packet will go into the host of intermediary node, where - // nodeport will be DNAT'ed to cluster IP service, and then hit the MTU 1400 route - // and trigger ICMP needs frag. - // MTU 1400 should be removed after bumping to OVS with https://bugzilla.redhat.com/show_bug.cgi?id=2170920 - // fixed. - ginkgo.By("Making sure that the ip route cache contains an MTU route") - if !echoMtuRegex.Match([]byte(stdout)) { - return fmt.Errorf("cannot find MTU cache entry in route: %s", stdout) - } - } else { - ginkgo.By("Making sure that the ip route cache does NOT contain an MTU route") - if echoMtuRegex.Match([]byte(stdout)) { - framework.Failf("found unexpected MTU cache route: %s", stdout) - } + ginkgo.By("Making sure that the ip route cache does NOT contain an MTU route") + if echoMtuRegex.Match([]byte(stdout)) { + framework.Failf("found unexpected MTU cache route: %s", stdout) } } return nil @@ -488,7 +512,8 @@ var _ = ginkgo.Describe("Services", func() { // flush this on all 3 nodes else we will run into the // bug: https://issues.redhat.com/browse/OCPBUGS-7609. // TODO: Revisit this once https://bugzilla.redhat.com/show_bug.cgi?id=2169839 is fixed. - ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + ovnKubernetesNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubernetesNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "name=ovnkube-node", }) if err != nil { @@ -500,11 +525,8 @@ var _ = ginkgo.Describe("Services", func() { if isInterconnectEnabled() { containerName = "ovnkube-controller" } - - arguments := []string{"exec", ovnKubeNodePod.Name, "--container", containerName, "--"} - sepFlush := strings.Split(flushCmd, " ") - arguments = append(arguments, sepFlush...) - _, err := e2ekubectl.RunKubectl(ovnNamespace, arguments...) + _, err := e2ekubectl.RunKubectl(ovnKubernetesNamespace, "exec", ovnKubeNodePod.Name, "--container", containerName, "--", + "ip", "route", "flush", "cache") framework.ExpectNoError(err, "Flushing the ip route cache failed") } } @@ -524,18 +546,20 @@ var _ = ginkgo.Describe("Services", func() { ) nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, e2eservice.MaxNodesForEndpointsTests) framework.ExpectNoError(err) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 0)) v4NodeAddrs := e2enode.FirstAddressByTypeAndFamily(nodes, v1.NodeInternalIP, v1.IPv4Protocol) v6NodeAddrs := e2enode.FirstAddressByTypeAndFamily(nodes, v1.NodeInternalIP, v1.IPv6Protocol) if v4NodeAddrs == "" && v6NodeAddrs == "" { framework.Failf("unable to detect if cluster supports IPv4 or IPv6") } + nodeName := nodes.Items[0].Name getIPRouteGetOutput := func(dst string) string { - cmd := []string{containerRuntime, "exec", ovnWorkerNode, "ip"} + cmd := []string{"ip"} if utilnet.IsIPv6String(dst) { cmd = append(cmd, "-6") } cmd = append(cmd, "route", "get", dst) - output, err := runCommand(cmd...) + output, err := infraprovider.Get().ExecK8NodeCommand(nodeName, cmd) framework.ExpectNoError(err, fmt.Sprintf("failed to exec '%v': %v", cmd, err)) return output } @@ -566,7 +590,7 @@ var _ = ginkgo.Describe("Services", func() { framework.ExpectNoError(err) node := nodes.Items[0] nodeName := node.Name - pods, err := cs.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + pods, err := cs.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + nodeName, }) @@ -577,13 +601,14 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Using node" + nodeName + " and pod " + clientPod.Name) ginkgo.By("Creating an empty ClusterIP service") + udpHostNsPort := infraprovider.Get().GetK8HostPort() service, err := jig.CreateUDPService(context.TODO(), func(s *v1.Service) { s.Spec.Ports = []v1.ServicePort{ { Name: "udp", Protocol: v1.ProtocolUDP, Port: 80, - TargetPort: intstr.FromInt(int(udpPort)), + TargetPort: intstr.FromInt(int(udpHostNsPort)), }, } @@ -606,7 +631,7 @@ var _ = ginkgo.Describe("Services", func() { framework.ExpectNoError(err) cleanupFn = func() { // initial pod used for host command may be deleted at this point, refetch - pods, err := cs.CoreV1().Pods(ovnNamespace).List(context.TODO(), metav1.ListOptions{ + pods, err := cs.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", FieldSelector: "spec.nodeName=" + nodeName, }) @@ -620,8 +645,8 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Starting a UDP server listening on the additional IP") // now that 2.2.2.2 exists on the node's lo interface, let's start a server listening on it // we use UDP here since agnhost lets us pick the listen address only for UDP - serverPod := e2epod.NewAgnhostPod(namespace, "backend", nil, nil, []v1.ContainerPort{{ContainerPort: (udpPort)}, {ContainerPort: (udpPort), Protocol: "UDP"}}, - "netexec", "--udp-port="+udpPortS, "--udp-listen-addresses="+extraIP) + serverPod := e2epod.NewAgnhostPod(namespace, "backend", nil, nil, []v1.ContainerPort{{ContainerPort: int32(udpHostNsPort)}, {ContainerPort: int32(udpHostNsPort), Protocol: "UDP"}}, + "netexec", "--udp-port="+fmt.Sprintf("%d", udpHostNsPort), "--udp-listen-addresses="+extraIP) serverPod.Labels = jig.Labels serverPod.Spec.NodeName = nodeName serverPod.Spec.HostNetwork = true @@ -633,7 +658,7 @@ var _ = ginkgo.Describe("Services", func() { // sanity check that we're actually listening on this IP err = wait.PollImmediate(framework.Poll, 30*time.Second, func() (bool, error) { cmd = fmt.Sprintf(`echo hostname | /usr/bin/socat -t 5 - "udp:%s"`, - net.JoinHostPort(extraIP, udpPortS)) + net.JoinHostPort(extraIP, fmt.Sprintf("%d", udpHostNsPort))) stdout, err := e2epodoutput.RunHostCmdWithRetries(clientPod.Namespace, clientPod.Name, cmd, framework.Poll, 30*time.Second) if err != nil { return false, err @@ -656,7 +681,7 @@ var _ = ginkgo.Describe("Services", func() { Ports: []v1.EndpointPort{ { Name: "udp", - Port: udpPort, + Port: int32(udpHostNsPort), Protocol: "UDP", }, }, @@ -681,7 +706,7 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Confirming that the service is accessible from the node's pod network") // Now, spin up a pod-network pod on the same node, and ensure we can talk to the "local address" service - clientServerPod := e2epod.NewAgnhostPod(namespace, "client", nil, nil, []v1.ContainerPort{{ContainerPort: (udpPort)}, {ContainerPort: (udpPort), Protocol: "UDP"}}, + clientServerPod := e2epod.NewAgnhostPod(namespace, "client", nil, nil, []v1.ContainerPort{{ContainerPort: 12345}, {ContainerPort: 12345, Protocol: "UDP"}}, "netexec") clientServerPod.Spec.NodeName = nodeName e2epod.NewPodClient(f).CreateSync(context.TODO(), clientServerPod) @@ -710,27 +735,24 @@ var _ = ginkgo.Describe("Services", func() { var err error nodeIPs := make(map[string]map[int]string) var egressNode string - var targetSecondaryNode node + var providerCtx infraapi.Context const ( - endpointHTTPPort = 80 - endpointUDPPort = 90 - clusterHTTPPort = 81 - clusterUDPPort = 91 - clientContainerName = "npclient" + endpointHTTPPort = 80 + endpointUDPPort = 90 + clusterHTTPPort = 81 + clusterUDPPort = 91 + clientContainerName = "npclient" + targetSecondaryContainerName = "egressSecondaryTargetNode-allowed" ) ginkgo.BeforeEach(func() { + providerCtx = infraprovider.Get().NewTestContext() nodeIPs = make(map[string]map[int]string) egressNode = "" - targetSecondaryNode = node{ - name: "egressSecondaryTargetNode-allowed", - } }) ginkgo.AfterEach(func() { - ginkgo.By("Cleaning up external container") - deleteClusterExternalContainer(clientContainerName) ginkgo.By("Deleting additional IP addresses from nodes") for nodeName, ipFamilies := range nodeIPs { for _, ip := range ipFamilies { @@ -738,27 +760,26 @@ var _ = ginkgo.Describe("Services", func() { if utilnet.IsIPv6String(ip) { subnetMask = "/128" } - _, err := runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "delete", - fmt.Sprintf("%s%s", ip, subnetMask), "dev", "breth0") - if err != nil && !strings.Contains(err.Error(), - "RTNETLINK answers: Cannot assign requested address") { + _, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "addr", "delete", + fmt.Sprintf("%s/%s", ip, subnetMask), "dev", deploymentconfig.Get().ExternalBridgeName()}) + if err != nil && !(strings.Contains(err.Error(), + "RTNETLINK answers: Cannot assign requested address") || !strings.Contains(err.Error(), "Address not found")) { framework.Failf("failed to remove ip address %s from node %s, err: %q", ip, nodeName, err) } } } - if len(targetSecondaryNode.nodeIP) > 0 { - ginkgo.By("Deleting EgressIP Setup if any") - e2ekubectl.RunKubectlOrDie("default", "delete", "eip", "egressip", "--ignore-not-found=true") + ginkgo.By("Deleting EgressIP Setup if any") + e2ekubectl.RunKubectlOrDie("default", "delete", "eip", "egressip", "--ignore-not-found=true") + if egressNode != "" { e2ekubectl.RunKubectlOrDie("default", "label", "node", egressNode, "k8s.ovn.org/egress-assignable-") - tearDownNetworkAndTargetForMultiNIC([]string{egressNode}, targetSecondaryNode) } + // network is removed by provider Context API }) ginkgo.It("should listen on each host addresses", func() { endPoints := make([]*v1.Pod, 0) endpointsSelector := map[string]string{"servicebackend": "true"} nodesHostnames := sets.NewString() - nodes, err = e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) @@ -786,54 +807,89 @@ var _ = ginkgo.Describe("Services", func() { } ginkgo.By("Creating an external container to send the traffic from") - createClusterExternalContainer(clientContainerName, agnhostImage, - []string{"--network", "kind", "-P"}, - []string{"netexec", "--http-port=80"}) - - // If `kindexgw` exists, connect client container to it - runCommand(containerRuntime, "network", "connect", "kindexgw", clientContainerName) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + externalContainerPort := infraprovider.Get().GetExternalContainerPort() + externalContainer := infraapi.ExternalContainer{Name: clientContainerName, Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(externalContainerPort), ExtPort: externalContainerPort} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "external container %s must be created", externalContainer.Name) + + // If `xgw` exists, connect client container to it + exGwNetwork, err := infraprovider.Get().GetNetwork("xgw") + if err == nil { + _, _ = providerCtx.AttachNetwork(exGwNetwork, externalContainer.Name) + } ginkgo.By("Selecting additional IP addresses for each node") // add new secondary IP from node subnet to all nodes, if the cluster is v6 add an ipv6 address toCurlAddresses := sets.NewString() - for i, node := range nodes.Items { - + primaryIPv4Subnet, ipv6, err := primaryProviderNetwork.IPv4IPv6Subnets() + framework.ExpectNoError(err, "must get primary provider network subnets") + primaryNetworkSubnet := primaryIPv4Subnet + if IsIPv6Cluster(f.ClientSet) { + primaryNetworkSubnet = ipv6 + } + _, primaryNetworkNet, err := net.ParseCIDR(primaryNetworkSubnet) + framework.ExpectNoError(err, "failed to parse primary subnet %q", primaryNetworkSubnet) + for _, node := range nodes.Items { addrAnnotation, ok := node.Annotations["k8s.ovn.org/host-cidrs"] gomega.Expect(ok).To(gomega.BeTrue()) var addrs []string + var addrsFiltered []string err := json.Unmarshal([]byte(addrAnnotation), &addrs) framework.ExpectNoError(err, "failed to parse node[%s] host-address annotation[%s]", node.Name, addrAnnotation) - for i, addr := range addrs { + for _, addr := range addrs { addrSplit := strings.Split(addr, "/") gomega.Expect(addrSplit).Should(gomega.HaveLen(2)) - addrs[i] = addrSplit[0] + ip := net.ParseIP(addrSplit[0]) + if !primaryNetworkNet.Contains(ip) { + framework.Logf("skipping IP %s because its outside primary provider network %s", ip, primaryNetworkSubnet) + continue + } + addrsFiltered = append(addrsFiltered, ip.String()) } - toCurlAddresses.Insert(addrs...) + toCurlAddresses.Insert(addrsFiltered...) // Calculate and store for AfterEach new target IP addresses. var newIP string if nodeIPs[node.Name] == nil { nodeIPs[node.Name] = make(map[int]string) } - if utilnet.IsIPv6String(e2enode.GetAddresses(&node, v1.NodeInternalIP)[0]) { - newIP = "fc00:f853:ccd:e793:1111::" + strconv.Itoa(i) + if IsIPv6Cluster(f.ClientSet) { + newIPIP, err := ipalloc.NewPrimaryIPv6() + framework.ExpectNoError(err, "must get new primary provider IPv4") + newIP = newIPIP.String() nodeIPs[node.Name][6] = newIP } else { - newIP = "172.18.1." + strconv.Itoa(i+1) - nodeIPs[node.Name][4] = newIP + newIPIP, err := ipalloc.NewPrimaryIPv4() + framework.ExpectNoError(err, "must get new primary provider IPv4") + nodeIPs[node.Name][4] = newIPIP.String() } } - + iproute2Proto := "-4" + if IsIPv6Cluster(f.ClientSet) { + iproute2Proto = "-6" + } ginkgo.By("Adding additional IP addresses to each node") for nodeName, ipFamilies := range nodeIPs { for _, ip := range ipFamilies { // manually add the a secondary IP to each node - _, err = runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "add", ip, "dev", "breth0") + framework.Logf("adding IP %q to Node %s", ip, nodeName) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", iproute2Proto, "addr", "add", ip, "dev", deploymentconfig.Get().ExternalBridgeName()}) if err != nil { framework.Failf("failed to add new IP address %s to node %s: %v", ip, nodeName, err) } + providerCtx.AddCleanUpFn(func() error { + // manually add the a secondary IP to each node + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", iproute2Proto, "addr", "del", ip, "dev", deploymentconfig.Get().ExternalBridgeName()}) + if err != nil { + return fmt.Errorf("failed to add new IP address %s to node %s: %v", ip, nodeName, err) + } + return nil + }) toCurlAddresses.Insert(ip) } } @@ -886,7 +942,7 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Hitting service " + serviceSpec.Name + " on " + address + " via " + protocol) gomega.Eventually(func() bool { - epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, address, toCurlPort, "hostname") // Expect to receive a valid hostname return nodesHostnames.Has(epHostname) @@ -935,13 +991,37 @@ var _ = ginkgo.Describe("Services", func() { ginkgo.By("Label egress node" + egressNode + " create external container to send egress traffic to via secondary MultiNIC EIP") egressNode = egressPod.Spec.NodeName e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egressNode, "k8s.ovn.org/egress-assignable", "dummy") + ginkgo.By("creating secondary network and attaching to all Nodes and external container") // configure and add additional network to worker containers for EIP multi NIC feature + subnet := secondaryIPV4Subnet + if isIPv6Cluster { + subnet = secondaryIPV6Subnet + } + secondaryProviderNetwork, err := providerCtx.CreateNetwork(secondaryNetworkName, subnet) + framework.ExpectNoError(err, "creation of network %s must be available to attach containers", secondaryNetworkName) + nodes, err = f.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + framework.ExpectNoError(err, "must list all Nodes") + for _, node := range nodes.Items { + _, err = providerCtx.AttachNetwork(secondaryProviderNetwork, node.Name) + framework.ExpectNoError(err, "network %s must attach to node %s", secondaryProviderNetwork.Name, node.Name) + } + serverExternalContainerPort := infraprovider.Get().GetExternalContainerPort() + serverExternalContainerSpec := infraapi.ExternalContainer{ + Name: targetSecondaryContainerName, + Image: images.AgnHost(), + Network: secondaryProviderNetwork, + Args: getAgnHostHTTPPortBindCMDArgs(serverExternalContainerPort), + ExtPort: serverExternalContainerPort, + } + serverExternalContainer, err := providerCtx.CreateExternalContainer(serverExternalContainerSpec) + framework.ExpectNoError(err, "creation of external container %s attached to network must succeed", + serverExternalContainerSpec.Name, serverExternalContainer.Network.Name()) + var serverExternalContainerIP string if isIPv6Cluster { - _, targetSecondaryNode.nodeIP = configNetworkAndGetTarget(secondaryIPV6Subnet, []string{egressNode}, isIPv6Cluster, targetSecondaryNode) + serverExternalContainerIP = serverExternalContainer.GetIPv6() } else { - targetSecondaryNode.nodeIP, _ = configNetworkAndGetTarget(secondaryIPV4Subnet, []string{egressNode}, isIPv6Cluster, targetSecondaryNode) + serverExternalContainerIP = serverExternalContainer.GetIPv4() } - ginkgo.By("Create an EgressIP object with one secondary multi NIC egress IP defined") egressIP := "10.10.10.105" // secondary subnet as defined in EIP test suite if isIPv6Cluster { @@ -962,11 +1042,13 @@ spec: if err := os.WriteFile("egressip.yaml", []byte(egressIPConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) } - defer func() { + ginkgo.DeferCleanup(func() error { + e2ekubectl.RunKubectlOrDie("default", "delete", "-f", "egressip.yaml", "--ignore-not-found=true") if err := os.Remove("egressip.yaml"); err != nil { - framework.Logf("Unable to remove the CRD config from disk: %v", err) + return fmt.Errorf("unable to remove the CRD config from disk: %v", err) } - }() + return nil + }) framework.Logf("Create the EgressIP configuration") e2ekubectl.RunKubectlOrDie("default", "create", "-f", "egressip.yaml") @@ -989,56 +1071,81 @@ spec: framework.Failf("Error: expected to have 1 egress IP assignment") } - ginkgo.By("Creating an external container to send the ingress nodeport service traffic from") - extClientv4, extClientv6 := createClusterExternalContainer(clientContainerName, agnhostImage, - []string{"--network", "kind", "-P"}, - []string{"netexec", "--http-port=80"}) - - // If `kindexgw` exists, connect client container to it - runCommand(containerRuntime, "network", "connect", "kindexgw", clientContainerName) + // FIXME: implement feature to connect networks + // FIXME: tests shouldnt depend on external resources precreated and instead manage those resources within + // the lifecycle of the test. + // If `xgw` exists, connect client container to it + exGWNetwork, err := infraprovider.Get().GetNetwork("xgw") + if err == nil { + _, _ = providerCtx.AttachNetwork(exGWNetwork, serverExternalContainer.GetName()) + } ginkgo.By("Selecting additional IP addresses for each node") // add new secondary IP from node subnet to all nodes, if the cluster is v6 add an ipv6 address - toCurlAddresses := sets.NewString() + secondaryNetworkV4Subnet, secondaryNetworkV6Subnet, err := secondaryProviderNetwork.IPv4IPv6Subnets() + framework.ExpectNoError(err, "must get secondary provider network subnets") + secondarySubnetStr := secondaryNetworkV4Subnet + if isIPv6Cluster { + secondarySubnetStr = secondaryNetworkV6Subnet + } + _, secondarySubnet, err := net.ParseCIDR(secondarySubnetStr) + framework.ExpectNoError(err, "must parse secondary subnet %q", secondarySubnetStr) + toCurlAddressesSecondary := sets.NewString() for i, node := range nodes.Items { - addrAnnotation, ok := node.Annotations["k8s.ovn.org/host-cidrs"] gomega.Expect(ok).To(gomega.BeTrue()) - - var addrs []string - err := json.Unmarshal([]byte(addrAnnotation), &addrs) + var allAddrs []string + var filteredAddrs []string + err := json.Unmarshal([]byte(addrAnnotation), &allAddrs) framework.ExpectNoError(err, "failed to parse node[%s] host-address annotation[%s]", node.Name, addrAnnotation) - for i, addr := range addrs { + for _, addr := range allAddrs { addrSplit := strings.Split(addr, "/") gomega.Expect(addrSplit).Should(gomega.HaveLen(2)) - addrs[i] = addrSplit[0] + // ensure only IPs for the secondary subnet are added + ip := net.ParseIP(addrSplit[0]) + gomega.Expect(ip).NotTo(gomega.BeNil()) + if !secondarySubnet.Contains(ip) { + framework.Logf("skipping IP %s because it is not within networks subnet %s", ip, subnet) + continue // skip if not within the networks subnet + } + filteredAddrs = append(filteredAddrs, ip.String()) } - toCurlAddresses.Insert(addrs...) + toCurlAddressesSecondary.Insert(filteredAddrs...) // Calculate and store for AfterEach new target IP addresses. - var newIP string if nodeIPs[node.Name] == nil { nodeIPs[node.Name] = make(map[int]string) } - if utilnet.IsIPv6String(e2enode.GetAddresses(&node, v1.NodeInternalIP)[0]) { - newIP = "fc00:f853:ccd:e793:1111::" + strconv.Itoa(i) - nodeIPs[node.Name][6] = newIP + + if isIPv6Cluster { + newIPIP := fmt.Sprintf("2001:db8:abcd:1234::%d", i+140) + nodeIPs[node.Name][6] = newIPIP } else { - newIP = "172.18.1." + strconv.Itoa(i+1) - nodeIPs[node.Name][4] = newIP + newIPIP := fmt.Sprintf("10.10.10.%d", i+140) + nodeIPs[node.Name][4] = newIPIP } } ginkgo.By("Adding additional IP addresses to each node") for nodeName, ipFamilies := range nodeIPs { + secondaryNetworkInterface, err := infraprovider.Get().GetK8NodeNetworkInterface(nodeName, secondaryProviderNetwork) + framework.ExpectNoError(err, "must get Node %s network interface %s", nodeName, secondaryProviderNetwork) + gomega.Expect(secondaryNetworkInterface.InfName).NotTo(gomega.BeEmpty(), "failed to fetch interface name from a k8 node attached to a secondary network") for _, ip := range ipFamilies { // manually add the a secondary IP to each node - _, err = runCommand(containerRuntime, "exec", nodeName, "ip", "addr", "add", ip, "dev", "breth0") + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "addr", "add", ip, "dev", secondaryNetworkInterface.InfName}) if err != nil { framework.Failf("failed to add new IP address %s to node %s: %v", ip, nodeName, err) } - toCurlAddresses.Insert(ip) + providerCtx.AddCleanUpFn(func() error { + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip", "addr", "del", ip, "dev", secondaryNetworkInterface.InfName}) + if err != nil { + return fmt.Errorf("failed to del newly assigned node IP address %s to node %s: %v", ip, nodeName, err) + } + return nil + }) + toCurlAddressesSecondary.Insert(ip) } } @@ -1073,14 +1180,14 @@ spec: etpClusterServiceName, f.Namespace.Name) ginkgo.By("Checking connectivity to the external container from egressIP pod " + egressPod.Name + " and verify that the source IP is the secondary NIC egress IP") - framework.Logf("Destination IPs for external container are ip=%v", targetSecondaryNode.nodeIP) - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetSecondaryNode, egressPod.Name, - egressPod.Namespace, true, []string{egressIP})) + framework.Logf("Destination IPs for external container are ip=%v", serverExternalContainerIP) + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(serverExternalContainer, + egressPod.Namespace, egressPod.Name, true, []string{egressIP})) framework.ExpectNoError(err, "Check connectivity from pod (%s/%s) to an external container attached to "+ "a network that is a secondary host network and verify that the src IP is the expected egressIP %s, failed: %v", egressPod.Namespace, egressPod.Name, egressIP, err) - externalSvcClientIPs := sets.NewString(extClientv4, extClientv6) + externalSvcClientIPs := sets.NewString(serverExternalContainerIP) for _, serviceSpec := range []*v1.Service{etpLocalSvc, etpClusterSvc} { tcpNodePort, udpNodePort := nodePortsFromService(serviceSpec) @@ -1089,21 +1196,25 @@ spec: if protocol == "udp" { toCurlPort = int32(udpNodePort) } - for _, address := range toCurlAddresses.List() { + for _, address := range toCurlAddressesSecondary.List() { if !isIPv6Cluster && utilnet.IsIPv6String(address) { continue } - ginkgo.By("Hitting service " + serviceSpec.Name + " on " + address + " via " + protocol) gomega.Eventually(func() bool { - epHostname := pokeEndpoint("", clientContainerName, protocol, address, toCurlPort, + epHostname := pokeEndpointViaExternalContainer(serverExternalContainer, protocol, address, toCurlPort, "hostname") // Expect to receive a valid hostname return nodesHostnames.Has(epHostname) }, "40s", "1s").Should(gomega.BeTrue()) } - egressNodeIP, err := getNodeIP(f.ClientSet, egressNode) - framework.ExpectNoError(err, fmt.Sprintf("failed to get nodes's %s node ip address", egressNode)) + secondaryNetworkInterface, err := infraprovider.Get().GetK8NodeNetworkInterface(egressNode, secondaryProviderNetwork) + framework.ExpectNoError(err, "must get secondary network interface information") + egressNodeIP := secondaryNetworkInterface.IPv4 + if isIPv6Cluster { + egressNodeIP = secondaryNetworkInterface.IPv6 + } + gomega.Expect(egressNodeIP).NotTo(gomega.BeEmpty(), "secondary external IP must be available") framework.Logf("NodeIP of node %s is %s", egressNode, egressNodeIP) ginkgo.By("Hitting service nodeport " + serviceSpec.Name + " on " + egressNodeIP + " via " + protocol) // send ingress traffic from external container to egressNode where the pod lives @@ -1124,7 +1235,7 @@ spec: // we try to hit the backend pod which is on the egressNode framework.Logf("%+v", externalSvcClientIPs) gomega.Eventually(func() bool { - epClientIP := pokeEndpoint("", clientContainerName, protocol, egressNodeIP, toCurlPort, "clientip") // Returns the request's IP address. + epClientIP := pokeEndpointViaExternalContainer(serverExternalContainer, protocol, egressNodeIP, toCurlPort, "clientip") // Returns the request's IP address. framework.Logf("Received srcIP: %v", epClientIP) IP, _, err := net.SplitHostPort(epClientIP) if err != nil { @@ -1152,8 +1263,10 @@ spec: gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 0)) nodeName := nodes.Items[0].Name - serverNodeIPv4, serverNodeIPv6 := getContainerAddressesForNetwork(nodeName, primaryNetworkName) - + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "failed to get primary network") + primaryInf, err := infraprovider.Get().GetK8NodeNetworkInterface(nodeName, primaryProviderNetwork) + framework.ExpectNoError(err, "failed to get primary interface for node %s", nodeName) ginkgo.By("Creating the backend pod") args := []string{ "netexec", @@ -1164,7 +1277,7 @@ spec: serverPodName := nodeName + "-ep" var serverContainerName string - _, err := createPod(f, serverPodName, nodeName, f.Namespace.Name, []string{}, endpointsSelector, + _, err = createPod(f, serverPodName, nodeName, f.Namespace.Name, []string{}, endpointsSelector, func(p *v1.Pod) { p.Spec.Containers[0].Args = args serverContainerName = p.Spec.Containers[0].Name @@ -1200,21 +1313,20 @@ spec: framework.ExpectNoError(err) ginkgo.By("Creating an external client") - clientIPv4, clientIPv6 := createClusterExternalContainer( - clientContainerName, - agnhostImage, - []string{"--privileged", "--network", "kind"}, - []string{"pause"}, - ) - - clientIP := clientIPv4 - serverNodeIP := serverNodeIPv4 - ipContainerCmd := "ip" + externalContainer := infraapi.ExternalContainer{Name: clientContainerName, Image: images.AgnHost(), Network: primaryProviderNetwork, + Args: []string{"pause"}, ExtPort: infraprovider.Get().GetExternalContainerPort()} + externalContainer, err = providerCtx.CreateExternalContainer(externalContainer) + framework.ExpectNoError(err, "failed to create external container", externalContainer) + + clientIP := externalContainer.GetIPv4() + serverNodeIP := primaryInf.IPv4 + ipContainerCmdPrefix := []string{"ip"} if IsIPv6Cluster(f.ClientSet) { - clientIP = clientIPv6 - serverNodeIP = serverNodeIPv6 - ipContainerCmd = "ip -6" + clientIP = externalContainer.GetIPv6() + serverNodeIP = primaryInf.IPv6 + ipContainerCmdPrefix = append(ipContainerCmdPrefix, "-6") } + gomega.Expect(clientIP).ShouldNot(gomega.BeEmpty()) const pmtu = "1300" payloads := map[string]string{ @@ -1231,19 +1343,14 @@ spec: // test setup required to achieve fragmentation without emulating // PMTUD is more complex so we stick to UDP. ginkgo.By("Lowering PMTU towards the server") - ipContainerCmd += " route add " + serverNodeIP + " dev eth0 src " + clientIP + " mtu " + pmtu - cmd := []string{ - containerRuntime, - "exec", - clientContainerName, - "/bin/sh", - "-c", - ipContainerCmd, - } - framework.Logf("Running %v", cmd) - _, err = runCommand(cmd...) + ipContainerCmd := append(ipContainerCmdPrefix, "route", "add", serverNodeIP, "dev", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "src", clientIP, "mtu", pmtu) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, ipContainerCmd) framework.ExpectNoError(err, "lowering MTU in the external kind container failed: %v", err) - + providerCtx.AddCleanUpFn(func() error { + ipContainerCmd := append(ipContainerCmdPrefix, "route", "del", serverNodeIP, "dev", infraprovider.Get().ExternalContainerPrimaryInterfaceName(), "src", clientIP, "mtu", pmtu) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, ipContainerCmd) + return err + }) var udpPort int32 for _, port := range service.Spec.Ports { if port.Protocol == v1.ProtocolUDP { @@ -1269,16 +1376,8 @@ spec: if sourcePort != "" { containerCmd = fmt.Sprintf("echo 'echo %s' | nc -w2 -u -p %s %s %d", payload, sourcePort, serverNodeIP, udpPort) } - cmd = []string{ - containerRuntime, - "exec", - clientContainerName, - "/bin/sh", - "-c", - containerCmd, - } - framework.Logf("Running %v", cmd) - stdout, err := runCommand(cmd...) + framework.Logf("Running %v", containerCmd) + stdout, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"/bin/sh", "-c", containerCmd}) framework.ExpectNoError(err, "sending echo request failed: %v", err) ginkgo.By("Checking that the service received the request and replied") @@ -1327,12 +1426,12 @@ func getServiceBackendsFromPod(execPod *v1.Pod, serviceIP string, servicePort in // or "fd69::5" var _ = ginkgo.Describe("Service Hairpin SNAT", func() { const ( - svcName = "service-hairpin-test" - backendName = "hairpin-backend-pod" - endpointHTTPPort = "80" - serviceHTTPPort = 6666 - V4LBHairpinMasqueradeIP = "169.254.0.5" - V6LBHairpinMasqueradeIP = "fd69::5" + svcName = "service-hairpin-test" + backendName = "hairpin-backend-pod" + endpointHTTPPort uint16 = 80 + serviceHTTPPort uint16 = 6666 + V4LBHairpinMasqueradeIP = "169.254.0.5" + V6LBHairpinMasqueradeIP = "fd69::5" ) var ( @@ -1361,7 +1460,7 @@ var _ = ginkgo.Describe("Service Hairpin SNAT", func() { ginkgo.It("Should ensure service hairpin traffic is SNATed to hairpin masquerade IP; Switch LB", func() { ginkgo.By("creating an ovn-network backend pod") - _, err := createGenericPodWithLabel(f, backendName, backendNodeName, namespaceName, []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", endpointHTTPPort)}, hairpinPodSel) + _, err := createGenericPodWithLabel(f, backendName, backendNodeName, namespaceName, getAgnHostHTTPPortBindFullCMD(endpointHTTPPort), hairpinPodSel) framework.ExpectNoError(err, fmt.Sprintf("unable to create backend pod: %s, err: %v", backendName, err)) ginkgo.By("creating a TCP service service-for-pods with type=ClusterIP in namespace " + namespaceName) @@ -1379,7 +1478,7 @@ var _ = ginkgo.Describe("Service Hairpin SNAT", func() { isIpv6 = true } - clientIP := pokeEndpoint(namespaceName, backendName, "http", svcIP, serviceHTTPPort, "clientip") + clientIP := pokeEndpointViaPod(f, namespaceName, backendName, svcIP, serviceHTTPPort, "clientip") clientIP, _, err = net.SplitHostPort(clientIP) framework.ExpectNoError(err, "failed to parse client ip:port") @@ -1394,15 +1493,16 @@ var _ = ginkgo.Describe("Service Hairpin SNAT", func() { ginkgo.By("creating an host-network backend pod on " + backendNodeName) // create hostNeworkedPods + hostNetPort := infraprovider.Get().GetK8HostPort() _, err := createPod(f, backendName, backendNodeName, namespaceName, []string{}, hairpinPodSel, func(p *v1.Pod) { - p.Spec.Containers[0].Command = []string{"/agnhost", "netexec", fmt.Sprintf("--http-port=%s", endpointHTTPPort)} + p.Spec.Containers[0].Command = getAgnHostHTTPPortBindFullCMD(hostNetPort) p.Spec.HostNetwork = true }) framework.ExpectNoError(err, fmt.Sprintf("unable to create backend pod: %s, err: %v", backendName, err)) ginkgo.By("creating a TCP service service-for-pods with type=NodePort in namespace " + namespaceName) - svcIP, err = createServiceForPodsWithLabel(f, namespaceName, serviceHTTPPort, endpointHTTPPort, "NodePort", hairpinPodSel) + svcIP, err = createServiceForPodsWithLabel(f, namespaceName, serviceHTTPPort, hostNetPort, "NodePort", hairpinPodSel) framework.ExpectNoError(err, fmt.Sprintf("unable to create service: service-for-pods, err: %v", err)) err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, "service-for-pods", 1, time.Second, wait.ForeverTestTimeout) @@ -1413,7 +1513,7 @@ var _ = ginkgo.Describe("Service Hairpin SNAT", func() { ginkgo.By("by sending a TCP packet to service service-for-pods with type=NodePort(" + nodeIP + ":" + fmt.Sprint(svc.Spec.Ports[0].NodePort) + ") in namespace " + namespaceName + " from node " + backendNodeName) - clientIP := pokeEndpoint("", backendNodeName, "http", nodeIP, svc.Spec.Ports[0].NodePort, "clientip") + clientIP := pokeEndpointViaNode(backendNodeName, "http", nodeIP, hostNetPort, uint16(svc.Spec.Ports[0].NodePort), "clientip") clientIP, _, err = net.SplitHostPort(clientIP) framework.ExpectNoError(err, "failed to parse client ip:port") @@ -1425,15 +1525,15 @@ var _ = ginkgo.Describe("Service Hairpin SNAT", func() { var _ = ginkgo.Describe("Load Balancer Service Tests with MetalLB", func() { const ( - svcName = "lbservice-test" - backendName = "lb-backend-pod" - endpointHTTPPort = 80 - endpointUDPPort = 10001 - loadBalancerYaml = "loadbalancer.yaml" - bgpAddYaml = "bgpAdd.yaml" - bgpEmptyYaml = "bgpEmptyAdd.yaml" - clientContainer = "lbclient" - routerContainer = "frr" + svcName = "lbservice-test" + backendName = "lb-backend-pod" + endpointHTTPPort = 80 + endpointUDPPort = 10001 + loadBalancerYaml = "loadbalancer.yaml" + bgpAddYaml = "bgpAdd.yaml" + bgpEmptyYaml = "bgpEmptyAdd.yaml" + externalClientContainerName = "lbclient" + externalRouterContainerName = "frr" ) var ( @@ -1547,16 +1647,16 @@ spec: ginkgo.AfterEach(func() { framework.Logf("Delete the Load Balancer configuration") - e2ekubectl.RunKubectlOrDie("default", "delete", "-f", loadBalancerYaml) + e2ekubectl.RunKubectlOrDie("default", "delete", "-f", loadBalancerYaml, "--ignore-not-found=true") defer func() { if err := os.Remove(loadBalancerYaml); err != nil { framework.Logf("Unable to remove the CRD config from disk: %v", err) } framework.Logf("Reset MTU on intermediary router to allow large packets") - cmd := []string{containerRuntime, "exec", routerContainer} - mtuCommand := strings.Split("ip link set mtu 1500 dev eth1", " ") - cmd = append(cmd, mtuCommand...) - _, err := runCommand(cmd...) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "must get primary provider network") + externalContainer := infraapi.ExternalContainer{Name: externalRouterContainerName, Network: primaryProviderNetwork} + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{"ip", "link", "set", "mtu", "1500", "dev", "eth1"}) framework.ExpectNoError(err, "failed to reset MTU on intermediary router") framework.Logf("Delete the custom BGP Advertisement configuration") e2ekubectl.RunKubectlOrDie("metallb-system", "delete", "bgpadvertisement", "example", "--ignore-not-found=true") @@ -1601,15 +1701,15 @@ metadata: if !utilnet.IsIPv6String(svcLoadBalancerIP) { ginkgo.By("Setting up external IPv4 client with an intermediate node") defer func() { - cleanupIPv4NetworkForExternalClient(svcLoadBalancerIP) + cleanupIPv4NetworkForExternalClient(svcLoadBalancerIP, endpointHTTPPort) }() - setupIPv4NetworkForExternalClient(svcLoadBalancerIP, nodeIP) + setupIPv4NetworkForExternalClient(svcLoadBalancerIP, endpointHTTPPort, nodeIP) } else { ginkgo.By("Setting up external IPv6 client with an intermediate node") defer func() { - cleanupIPv6NetworkForExternalClient(svcLoadBalancerIP) + cleanupIPv6NetworkForExternalClient(svcLoadBalancerIP, endpointHTTPPort) }() - setupIPv6NetworkForExternalClient(svcLoadBalancerIP, nodeIP) + setupIPv6NetworkForExternalClient(svcLoadBalancerIP, endpointHTTPPort, nodeIP) svcIPforCurl = fmt.Sprintf("[%s]", svcLoadBalancerIP) } @@ -1664,13 +1764,15 @@ metadata: svcLoadBalancerIP, err := getServiceLoadBalancerIP(f.ClientSet, namespaceName, svcName) framework.ExpectNoError(err, fmt.Sprintf("failed to get service lb ip: %s, err: %v", svcName, err)) - numberOfETPRules := pokeIPTableRules(backendNodeName, "OVN-KUBE-EXTERNALIP") + numberOfETPRules := pokeNodeIPTableRules(backendNodeName, "OVN-KUBE-EXTERNALIP") gomega.Expect(numberOfETPRules).To(gomega.Equal(5)) // curl the LB service from the client container to trigger BGP route advertisement ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) - - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "must fetch primary provider network") + externalContainer := infraapi.ExternalContainer{Name: "lbclient", Network: primaryProviderNetwork} // pre-created + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") ginkgo.By("all 3 nodeIP routes are advertised correctly by metalb BGP routes") @@ -1679,7 +1781,8 @@ metadata: // nexthop via 172.19.0.3 dev eth0 weight 1 // nexthop via 172.19.0.4 dev eth0 weight 1 // nexthop via 172.19.0.2 dev eth0 weight 1 - cmd := []string{containerRuntime, "exec", routerContainer} + + cmd := []string{} ipVer := "" if utilnet.IsIPv6String(svcLoadBalancerIP) { ipVer = " -6" @@ -1691,14 +1794,15 @@ metadata: framework.ExpectNoError(err, fmt.Sprintf("failed to get node's %s node ip address", backendNodeName)) nonBackendNodeIP, err := getNodeIP(f.ClientSet, backendNodeName) framework.ExpectNoError(err, fmt.Sprintf("failed to get node's %s node ip address", backendNodeName)) + frrExternalContainer := infraapi.ExternalContainer{Name: "frr", Network: primaryProviderNetwork} gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(frrExternalContainer, cmd) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) return strings.Contains(routes, backendNodeIP) }, 30*time.Second).Should(gomega.BeTrue()) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(frrExternalContainer, cmd) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) return strings.Contains(routes, nonBackendNodeIP) @@ -1737,11 +1841,11 @@ spec: nodeIP, err := getNodeIP(f.ClientSet, node) framework.ExpectNoError(err, fmt.Sprintf("failed to get nodes's %s node ip address", node)) framework.Logf("NodeIP of node %s is %s", node, nodeIP) - cmd := []string{containerRuntime, "exec", routerContainer} - + externalContainer := infraapi.ExternalContainer{Name: externalRouterContainerName, Network: primaryProviderNetwork} + cmd := []string{} cmd = append(cmd, bgpRouteCommand...) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, cmd) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) routeCount := 0 @@ -1761,29 +1865,28 @@ spec: ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName + " via node " + node) - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") ginkgo.By("change MTU on intermediary router to force icmp related packets") - cmd = []string{containerRuntime, "exec", routerContainer} + cmd = []string{} mtuCommand := strings.Split("ip link set mtu 1280 dev eth1", " ") - cmd = append(cmd, mtuCommand...) - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, cmd) framework.ExpectNoError(err, "failed to change MTU on intermediary router") time.Sleep(time.Second * 5) // buffer to ensure MTU change took effect ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName + " via node " + node) - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") ginkgo.By("reset MTU on intermediary router to allow large packets") - cmd = []string{containerRuntime, "exec", routerContainer} + cmd = []string{} mtuCommand = strings.Split("ip link set mtu 1500 dev eth1", " ") cmd = append(cmd, mtuCommand...) - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, cmd) framework.ExpectNoError(err, "failed to reset MTU on intermediary router") } }) @@ -1800,7 +1903,7 @@ spec: checkNumberOfETPRules := func(value int, pattern string) wait.ConditionFunc { return func() (bool, error) { - numberOfETPRules := pokeIPTableRules(backendNodeName, pattern) + numberOfETPRules := pokeNodeIPTableRules(backendNodeName, pattern) return (numberOfETPRules == value), nil } } @@ -1825,8 +1928,8 @@ spec: framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) - - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + externalContainer := infraapi.ExternalContainer{Name: externalClientContainerName} + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") ginkgo.By("patching service " + svcName + " to allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") @@ -1854,7 +1957,7 @@ spec: ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") pktSize := 60 @@ -1884,7 +1987,7 @@ spec: ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) - _, err = curlInContainer(clientContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso -o big.iso", 120) + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso", 120) framework.ExpectNoError(err, "failed to curl load balancer service") err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) @@ -1916,8 +2019,10 @@ spec: nodeIP, err := getNodeIP(f.ClientSet, backendNodeName) framework.ExpectNoError(err, fmt.Sprintf("failed to get nodes's %s node ip address", backendNodeName)) framework.Logf("NodeIP of node %s is %s", backendNodeName, nodeIP) - cmd := []string{containerRuntime, "exec", routerContainer} - + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "must get primary provider network") + frrExternalContainer := infraapi.ExternalContainer{Name: externalRouterContainerName, Network: primaryProviderNetwork} + cmd := []string{} ipVer := "" if utilnet.IsIPv6String(svcLoadBalancerIP) { ipVer = " -6" @@ -1926,7 +2031,7 @@ spec: cmd = append(cmd, bgpRouteCommand...) gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(frrExternalContainer, cmd) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) routeCount := 0 @@ -1949,9 +2054,15 @@ spec: svcLoadBalancerIP, endpointUDPPort, ) - cmd = []string{containerRuntime, "exec", clientContainer, "bash", "-x", "-c", netcatCmd} + cmd = []string{"bash", "-x", "-c", netcatCmd} framework.Logf("netcat command %s", cmd) - output, err = runCommand(cmd...) + clientNetNetwork, err := infraprovider.Get().GetNetwork("clientnet") + if errors.Is(err, infraapi.NotFound) { + ginkgo.Skip("clientnet network is not available. Skipping") + } + framework.ExpectNoError(err, "clientnet network must be available") + lbClientExternalContainer := infraapi.ExternalContainer{Name: "lbclient", Network: clientNetNetwork} + output, err = infraprovider.Get().ExecExternalContainerCommand(lbClientExternalContainer, cmd) framework.ExpectNoError(err, "failed to connect to load balancer service") framework.Logf("netcat command output %s", output) @@ -1959,15 +2070,16 @@ spec: // Check that sourceIP of the LBService is preserved targetPodLogs, err := e2ekubectl.RunKubectl("default", "logs", "-l", "app=nginx", "--container", "udp-server") framework.ExpectNoError(err, "failed to inspect logs in backend pods") - framework.Logf("%v", targetPodLogs) - lbClientIPv4, lbClientIPv6 := getContainerAddressesForNetwork(clientContainer, "clientnet") - framework.Logf("%v", lbClientIPv4) - if strings.Contains(targetPodLogs, lbClientIPv4) { - framework.Logf("found the expected srcIP %s!", lbClientIPv4) - } else if strings.Contains(targetPodLogs, lbClientIPv6) { - framework.Logf("found the expected srcIP %s!", lbClientIPv6) + framework.Logf("Target pod logs (nginx): %q", targetPodLogs) + lbClientInf, err := infraprovider.Get().GetExternalContainerNetworkInterface(lbClientExternalContainer, clientNetNetwork) + framework.ExpectNoError(err, "failed to get network interface info for network %s within lbclient external container %s", clientNetNetwork.Name(), frrExternalContainer.GetName()) + framework.Logf("found external container IPv4: %q, IPv6: %q", lbClientInf.IPv4, lbClientInf.IPv6) + if strings.Contains(targetPodLogs, lbClientInf.IPv4) { + framework.Logf("found the expected srcIP %s!", lbClientInf.IPv4) + } else if strings.Contains(targetPodLogs, lbClientInf.IPv6) { + framework.Logf("found the expected srcIP %s!", lbClientInf.IPv6) } else { - framework.Failf("could not get expected srcIP!") + framework.Failf("could not get expected srcIP!, target pod logs:\n%q", targetPodLogs) } ginkgo.By("patching service " + svcName + " to sessionAffinity=ClientIP at default timeout of 10800") @@ -1982,13 +2094,13 @@ spec: ginkgo.By("by sending a UDP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) // OVN drops the 1st packet so this one does nothing basically. // See https://issues.redhat.com/browse/FDP-223 for details - output, err = runCommand(cmd...) + output, err = infraprovider.Get().ExecExternalContainerCommand(lbClientExternalContainer, cmd) framework.ExpectNoError(err, "failed to connect to load balancer service") framework.Logf("netcat command output %s", output) time.Sleep(time.Second * 10) // buffer to ensure all learn flows are created correctly after the previous drop // OVN drops the 1st packet so let's be sure to another set of netcat connections at least to check the srcIP - output, err = runCommand(cmd...) + output, err = infraprovider.Get().ExecExternalContainerCommand(lbClientExternalContainer, cmd) framework.ExpectNoError(err, "failed to connect to load balancer service") framework.Logf("netcat command output %s", output) @@ -1997,12 +2109,12 @@ spec: targetPodLogs, err = e2ekubectl.RunKubectl("default", "logs", "-l", "app=nginx", "--container", "udp-server") framework.ExpectNoError(err, "failed to inspect logs in backend pods") framework.Logf("%v", targetPodLogs) - if strings.Count(targetPodLogs, lbClientIPv4) >= 2 { - framework.Logf("found the expected srcIP %s!", lbClientIPv4) - } else if strings.Count(targetPodLogs, lbClientIPv6) >= 2 { - framework.Logf("found the expected srcIP %s!", lbClientIPv6) + if strings.Count(targetPodLogs, lbClientInf.IPv4) >= 2 { + framework.Logf("found the expected srcIP %s!", lbClientInf.IPv4) + } else if strings.Count(targetPodLogs, lbClientInf.IPv6) >= 2 { + framework.Logf("found the expected srcIP %s!", lbClientInf.IPv6) } else { - framework.Failf("could not get expected srcIP!") + framework.Failf("could not get expected srcIP!, target pod logs:\n%q", targetPodLogs) } }) ginkgo.It("Should ensure load balancer service works when ETP=local and backend pods are also egressIP served pods", func() { @@ -2044,17 +2156,16 @@ spec: nodeIP, err := getNodeIP(f.ClientSet, backendNodeName) framework.ExpectNoError(err, fmt.Sprintf("failed to get nodes's %s node ip address", backendNodeName)) framework.Logf("NodeIP of node %s is %s", backendNodeName, nodeIP) - cmd := []string{containerRuntime, "exec", routerContainer} - ipVer := "" if utilnet.IsIPv6String(svcLoadBalancerIP) { ipVer = " -6" } bgpRouteCommand := strings.Split(fmt.Sprintf("ip%s route show %s", ipVer, svcLoadBalancerIP), " ") - cmd = append(cmd, bgpRouteCommand...) - + primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() + framework.ExpectNoError(err, "must fetch primary provider network") + frrExternalContainer := infraapi.ExternalContainer{Name: "frr", Network: primaryProviderNetwork} gomega.Eventually(func() bool { - routes, err := runCommand(cmd...) + routes, err := infraprovider.Get().ExecExternalContainerCommand(frrExternalContainer, bgpRouteCommand) framework.ExpectNoError(err, "failed to get BGP routes from intermediary router") framework.Logf("Routes in FRR %s", routes) routeCount := 0 @@ -2077,9 +2188,15 @@ spec: svcLoadBalancerIP, endpointUDPPort, ) - cmd = []string{containerRuntime, "exec", clientContainer, "bash", "-x", "-c", netcatCmd} + cmd := []string{"bash", "-x", "-c", netcatCmd} framework.Logf("netcat command %s", cmd) - output, err = runCommand(cmd...) + clientNetNetwork, err := infraprovider.Get().GetNetwork("clientnet") + if errors.Is(err, infraapi.NotFound) { + ginkgo.Skip("clientnet network is not available. Skipping") + } + framework.ExpectNoError(err, "clientnet network must be available") + lbClientExternalContainer := infraapi.ExternalContainer{Name: "lbclient", Network: clientNetNetwork} + output, err = infraprovider.Get().ExecExternalContainerCommand(lbClientExternalContainer, cmd) framework.ExpectNoError(err, "failed to connect to load balancer service") framework.Logf("netcat command output %s", output) @@ -2087,15 +2204,17 @@ spec: // Check that sourceIP of the LBService is preserved targetPodLogs, err := e2ekubectl.RunKubectl("default", "logs", "-l", "app=nginx", "--container", "udp-server") framework.ExpectNoError(err, "failed to inspect logs in backend pods") - framework.Logf("%v", targetPodLogs) - lbClientIPv4, lbClientIPv6 := getContainerAddressesForNetwork(clientContainer, "clientnet") - framework.Logf("%v", lbClientIPv4) + framework.Logf("Target pod logs:\n%q", targetPodLogs) + lbClientNetworkInterface, err := infraprovider.Get().GetExternalContainerNetworkInterface(lbClientExternalContainer, clientNetNetwork) + framework.ExpectNoError(err, "failed to get network interface info for external container %s connected to network %s", frrExternalContainer.Name, clientNetNetwork.Name()) + lbClientIPv4, lbClientIPv6 := lbClientNetworkInterface.IPv4, lbClientNetworkInterface.IPv6 + framework.Logf("found external container lbclient IPs: IPv4 %q, IPv6: %q", lbClientIPv4, lbClientIPv6) if strings.Contains(targetPodLogs, lbClientIPv4) { framework.Logf("found the expected srcIP %s!", lbClientIPv4) } else if strings.Contains(targetPodLogs, lbClientIPv6) { framework.Logf("found the expected srcIP %s!", lbClientIPv6) } else { - framework.Failf("could not get expected srcIP!") + framework.Failf("could not get expected srcIP!, target pod logs:\n%q", targetPodLogs) } ginkgo.By("label " + nonBackendNodeName + " as egressIP assignable") @@ -2104,15 +2223,13 @@ spec: ginkgo.By("Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, // the kind subnet is /16 or /64 so the following should be fine. - dupIP := func(ip net.IP) net.IP { - dup := make(net.IP, len(ip)) - copy(dup, ip) - return dup + var egressIP1 net.IP + if utilnet.IsIPv6String(svcLoadBalancerIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() } - sampleNodeIP := net.ParseIP(nodeIP) - egressIP1 := dupIP(sampleNodeIP) - egressIP1[len(egressIP1)-2]++ - + framework.ExpectNoError(err, "must allocate new Node IP for EgressIP IP") var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP metadata: @@ -2130,11 +2247,13 @@ spec: if err := os.WriteFile("egressip.yaml", []byte(egressIPConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) } - defer func() { + ginkgo.DeferCleanup(func() error { + e2ekubectl.RunKubectlOrDie("default", "delete", "-f", "egressip.yaml", "--ignore-not-found=true") if err := os.Remove("egressip.yaml"); err != nil { - framework.Logf("Unable to remove the CRD config from disk: %v", err) + return fmt.Errorf("unable to remove the CRD config from disk: %v", err) } - }() + return nil + }) framework.Logf("Create the EgressIP configuration") e2ekubectl.RunKubectlOrDie("default", "create", "-f", "egressip.yaml") @@ -2158,7 +2277,7 @@ spec: } ginkgo.By("by sending a UDP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) - output, err = runCommand(cmd...) + output, err = infraprovider.Get().ExecExternalContainerCommand(lbClientExternalContainer, cmd) framework.ExpectNoError(err, "failed to connect to load balancer service") framework.Logf("netcat command output %s", output) @@ -2172,7 +2291,7 @@ spec: } else if strings.Count(targetPodLogs, lbClientIPv6) >= 2 { framework.Logf("found the expected srcIP %s!", lbClientIPv6) } else { - framework.Failf("could not get expected srcIP!") + framework.Failf("could not get expected srcIP!, target pod logs:\n%q", targetPodLogs) } }) }) @@ -2196,7 +2315,10 @@ func getNodeIP(c clientset.Interface, nodeName string) (string, error) { func buildAndRunCommand(command string) error { cmd := strings.Split(command, " ") - _, err := runCommand(cmd...) + output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output) + } return err } @@ -2211,7 +2333,7 @@ func getServiceLoadBalancerIP(c clientset.Interface, namespace, serviceName stri return svc.Status.LoadBalancer.Ingress[0].IP, nil } -func setupIPv4NetworkForExternalClient(svcLoadBalancerIP, nodeIP string) { +func setupIPv4NetworkForExternalClient(svcLoadBalancerIP string, svcLoadBalancerPort int, nodeIP string) { // The external client configuration done in install_metallb can not be used because routes for external client // installed in K8s node https://github.com/ovn-org/ovn-kubernetes/blob/master/contrib/kind.sh#L1045-L1047 // are ignored in shared gateway mode and traffic coming back from pod is put on the docker bridge directly by @@ -2234,6 +2356,7 @@ func setupIPv4NetworkForExternalClient(svcLoadBalancerIP, nodeIP string) { // | 172.18.0.1 | // | ip route add 192.168.223.0/24 via 192.168.222.2 // | ip route add via| + // | iptables -t filter -I FORWARD -d -p tcp -m tcp --dport -j ACCEPT // | | // | vm 192.168.222.1 | // +----------------------------------------+-------------------------------------+ @@ -2264,17 +2387,21 @@ func setupIPv4NetworkForExternalClient(svcLoadBalancerIP, nodeIP string) { err = buildAndRunCommand("sudo ip route add 192.168.223.0/24 via 192.168.222.2") framework.ExpectNoError(err, "failed to add route for client to handle reverse service traffic") + err = buildAndRunCommand(fmt.Sprintf("sudo iptables -t filter -I FORWARD -d %s -p tcp -m tcp --dport %d -j ACCEPT", svcLoadBalancerIP, svcLoadBalancerPort)) + framework.ExpectNoError(err, "failed to add iptables rule for service") + err = buildAndRunCommand(fmt.Sprintf("sudo ip route add %s via %s", svcLoadBalancerIP, nodeIP)) framework.ExpectNoError(err, "failed to add route for external load balancer service") } -func cleanupIPv4NetworkForExternalClient(svcLoadBalancerIP string) { +func cleanupIPv4NetworkForExternalClient(svcLoadBalancerIP string, svcLoadBalancerPort int) { cleanupNetNamespace() buildAndRunCommand("sudo ip route delete 192.168.223.0/24 via 192.168.222.2") buildAndRunCommand(fmt.Sprintf("sudo ip route delete %s", svcLoadBalancerIP)) + buildAndRunCommand(fmt.Sprintf("sudo iptables -t filter -D FORWARD -d %s -p tcp -m tcp --dport %d -j ACCEPT", svcLoadBalancerIP, svcLoadBalancerPort)) } -func setupIPv6NetworkForExternalClient(svcLoadBalancerIP, nodeIP string) { +func setupIPv6NetworkForExternalClient(svcLoadBalancerIP string, svcLoadBalancerPort int, nodeIP string) { // The external client configuration done in install_metallb can not be used because routes for external client // installed in K8s node https://github.com/ovn-org/ovn-kubernetes/blob/master/contrib/kind.sh#L1045-L1047 // are ignored in shared gateway mode and traffic coming back from pod is put on the docker bridge directly by @@ -2325,12 +2452,16 @@ func setupIPv6NetworkForExternalClient(svcLoadBalancerIP, nodeIP string) { err = buildAndRunCommand(fmt.Sprintf("sudo ip -6 route add %s via %s", svcLoadBalancerIP, nodeIP)) framework.ExpectNoError(err, "failed to add route for external load balancer service") + + err = buildAndRunCommand(fmt.Sprintf("sudo ip6tables -t filter -I FORWARD -d %s -p tcp -m tcp --dport %d -j ACCEPT", svcLoadBalancerIP, svcLoadBalancerPort)) + framework.ExpectNoError(err, "failed to add iptables rule for service") } -func cleanupIPv6NetworkForExternalClient(svcLoadBalancerIP string) { +func cleanupIPv6NetworkForExternalClient(svcLoadBalancerIP string, svcLoadBalancerPort int) { cleanupNetNamespace() buildAndRunCommand("sudo ip -6 route delete fc00:f853:ccd:e223::2") buildAndRunCommand(fmt.Sprintf("sudo ip -6 route delete %s", svcLoadBalancerIP)) + buildAndRunCommand(fmt.Sprintf("sudo ip6tables -t filter -D FORWARD -d %s -p tcp -m tcp --dport %d -j ACCEPT", svcLoadBalancerIP, svcLoadBalancerPort)) } func setupNetNamespaceAndLinks() { diff --git a/test/e2e/static_pods.go b/test/e2e/static_pods.go index 19cf496d5e..c974e5b03d 100644 --- a/test/e2e/static_pods.go +++ b/test/e2e/static_pods.go @@ -4,11 +4,15 @@ import ( "context" "fmt" "os" + "os/exec" + "strings" "time" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" @@ -30,7 +34,15 @@ func waitForPodRunningInNamespaceTimeout(c clientset.Interface, podName, namespa }) } -func createStaticPod(f *framework.Framework, nodeName string, podYaml string) { +func createStaticPod(nodeName string, podYaml string) { + // FIXME; remove need to use a container runtime because its not portable + runCommand := func(cmd ...string) (string, error) { + output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output) + } + return string(output), nil + } //create file var podFile = "static-pod.yaml" if err := os.WriteFile(podFile, []byte(podYaml), 0644); err != nil { @@ -48,39 +60,36 @@ func createStaticPod(f *framework.Framework, nodeName string, podYaml string) { if err != nil { framework.Failf("failed to copy pod file to node %s", nodeName) } - } func removeStaticPodFile(nodeName string, podFile string) { - cmd := []string{"docker", "exec", nodeName, "bash", "-c", "rm /etc/kubernetes/manifests/static-pod.yaml"} + // FIXME; remove need to use a container runtime because its not portable + runCommand := func(cmd ...string) (string, error) { + output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output) + } + return string(output), nil + } + + cmd := []string{"docker", "exec", nodeName, "bash", "-c", fmt.Sprintf("rm /etc/kubernetes/manifests/%s", podFile)} framework.Logf("Running command %v", cmd) _, err := runCommand(cmd...) if err != nil { framework.Failf("failed to remove pod file from node %s", nodeName) } - } // This test does the following // Applies a static-pod.yaml file to a nodes /etc/kubernetes/manifest dir // Expects the static pod to succeed var _ = ginkgo.Describe("Creating a static pod on a node", func() { - - const ( - podFile string = "static-pod.yaml" - agnhostImage string = "registry.k8s.io/e2e-test-images/agnhost:2.26" - ) + const podFile string = "static-pod.yaml" f := wrappedTestFramework("staticpods") - var cs clientset.Interface - - ginkgo.BeforeEach(func() { - cs = f.ClientSet - }) - ginkgo.It("Should successfully create then remove a static pod", func() { - nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 3) + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 3) framework.ExpectNoError(err) if len(nodes.Items) < 1 { framework.Failf("Test requires 1 Ready node, but there are none") @@ -88,8 +97,8 @@ var _ = ginkgo.Describe("Creating a static pod on a node", func() { nodeName := nodes.Items[0].Name podName := fmt.Sprintf("static-pod-%s", nodeName) - ginkgo.By("copying a pod.yaml file into the /etc/kubernetes/manifests dir of a node") - framework.Logf("creating %s on node %s", podName, nodeName) + ginkgo.By("creating static pod file") + var staticPodYaml = fmt.Sprintf(`apiVersion: v1 kind: Pod metadata: @@ -100,14 +109,14 @@ spec: - name: web image: %s command: ["/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait"] -`, f.Namespace.Name, agnhostImage) - createStaticPod(f, nodeName, staticPodYaml) - err = waitForPodRunningInNamespaceTimeout(f.ClientSet, podName, f.Namespace.Name, time.Second*30) +`, f.Namespace.Name, images.AgnHost()) + createStaticPod(nodeName, staticPodYaml) + err = waitForPodRunningInNamespaceTimeout(f.ClientSet, podName, f.Namespace.Name, time.Second*60) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By("Removing the pod file from the nodes /etc/kubernetes/manifests") framework.Logf("Removing %s from %s", podName, nodeName) removeStaticPodFile(nodeName, podFile) - err = e2epod.WaitForPodNotFoundInNamespace(context.TODO(), f.ClientSet, podName, f.Namespace.Name, time.Second*30) + err = e2epod.WaitForPodNotFoundInNamespace(context.TODO(), f.ClientSet, podName, f.Namespace.Name, time.Second*60) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) diff --git a/test/e2e/status_manager.go b/test/e2e/status_manager.go index e482bedf0f..b6e7a9bfeb 100644 --- a/test/e2e/status_manager.go +++ b/test/e2e/status_manager.go @@ -113,8 +113,8 @@ func checkEgressFirewallStatus(namespace string, empty bool, success bool, event return empty && output == "" || success && strings.Contains(output, "EgressFirewall Rules applied") } if eventually { - gomega.Eventually(checkStatus, 1*time.Second, 100*time.Millisecond).Should(gomega.BeTrue()) + gomega.Eventually(checkStatus, 5*time.Second, 100*time.Millisecond).Should(gomega.BeTrue()) } else { - gomega.Consistently(checkStatus, 1*time.Second, 100*time.Millisecond).Should(gomega.BeTrue()) + gomega.Consistently(checkStatus, 5*time.Second, 100*time.Millisecond).Should(gomega.BeTrue()) } } diff --git a/test/e2e/unidling.go b/test/e2e/unidling.go index 213c371f49..9566b3190f 100644 --- a/test/e2e/unidling.go +++ b/test/e2e/unidling.go @@ -10,6 +10,8 @@ import ( "sync" "time" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -68,7 +70,8 @@ var _ = ginkgo.Describe("Unidling", func() { // Add a backend pod to the service in one node ginkgo.By("creating a backend pod for the service " + serviceName) - serverPod := e2epod.NewAgnhostPod(namespace, "pod-backend", nil, nil, []v1.ContainerPort{{ContainerPort: 9376}}, "serve-hostname") + serverPodPort := infraprovider.Get().GetK8HostPort() + serverPod := e2epod.NewAgnhostPod(namespace, "pod-backend", nil, nil, []v1.ContainerPort{{ContainerPort: int32(serverPodPort)}}, "serve-hostname") serverPod.Labels = jig.Labels serverPod.Spec.NodeName = nodeName e2epod.NewPodClient(f).CreateSync(context.TODO(), serverPod) diff --git a/test/e2e/util.go b/test/e2e/util.go index 02ff57f775..aba6dcbc44 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -14,7 +14,12 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/images" + "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider" + infraapi "github.com/ovn-org/ovn-kubernetes/test/e2e/infraprovider/api" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -37,7 +42,6 @@ import ( ) const ( - ovnNamespace = "ovn-kubernetes" ovnNodeSubnets = "k8s.ovn.org/node-subnets" // ovnNodeZoneNameAnnotation is the node annotation name to store the node zone name. ovnNodeZoneNameAnnotation = "k8s.ovn.org/zone-name" @@ -45,15 +49,8 @@ const ( ovnGatewayMTUSupport = "k8s.ovn.org/gateway-mtu-support" ) -var containerRuntime = "docker" var singleNodePerZoneResult *bool -func init() { - if cr, found := os.LookupEnv("CONTAINER_RUNTIME"); found { - containerRuntime = cr - } -} - type IpNeighbor struct { Dst string `dst` Lladdr string `lladdr` @@ -117,7 +114,7 @@ func newAgnhostPod(namespace, name string, command ...string) *v1.Pod { Containers: []v1.Container{ { Name: name, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, @@ -138,7 +135,7 @@ func newLatestAgnhostPod(namespace, name string, command ...string) *v1.Pod { Containers: []v1.Container{ { Name: name, - Image: agnhostImageNew, + Image: images.AgnHost(), Command: command, }, }, @@ -160,7 +157,7 @@ func newAgnhostPodOnNode(name, nodeName string, labels map[string]string, comman Containers: []v1.Container{ { Name: name, - Image: agnhostImage, + Image: images.AgnHost(), Command: command, }, }, @@ -309,17 +306,11 @@ func externalIPServiceSpecFrom(svcName string, httpPort, updPort, clusterHTTPPor return res } -// pokeEndpoint leverages a container running the netexec command to send a "request" to a target running +// pokeEndpointViaExternalContainer leverages a container running the netexec command to send a "request" to a target running // netexec on the given target host / protocol / port. // Returns the response based on the provided "request". -func pokeEndpoint(namespace, clientContainer, protocol, targetHost string, targetPort int32, request string) string { - ipPort := net.JoinHostPort("localhost", "80") - cmd := []string{containerRuntime, "exec", clientContainer} - if len(namespace) != 0 { - // command is to be run inside a pod, not containerRuntime - cmd = []string{"exec", clientContainer, "--"} - } - +func pokeEndpointViaExternalContainer(externalContainer infraapi.ExternalContainer, protocol, targetHost string, targetPort int32, request string) string { + ipPort := net.JoinHostPort("localhost", externalContainer.GetPortStr()) // we leverage the dial command from netexec, that is already supporting multiple protocols curlCommand := strings.Split(fmt.Sprintf("curl -g -q -s http://%s/dial?request=%s&protocol=%s&host=%s&port=%d&tries=1", ipPort, @@ -327,16 +318,10 @@ func pokeEndpoint(namespace, clientContainer, protocol, targetHost string, targe protocol, targetHost, targetPort), " ") - - cmd = append(cmd, curlCommand...) var res string var err error - if len(namespace) != 0 { - res, err = e2ekubectl.RunKubectl(namespace, cmd...) - } else { - // command is to be run inside runtime container - res, err = runCommand(cmd...) - } + // command is to be run inside runtime container + res, err = infraprovider.Get().ExecExternalContainerCommand(externalContainer, curlCommand) framework.ExpectNoError(err, "failed to run command on external container") response, err := parseNetexecResponse(res) if err != nil { @@ -345,17 +330,54 @@ func pokeEndpoint(namespace, clientContainer, protocol, targetHost string, targe return "" } framework.ExpectNoError(err) + return response +} +// pokeEndpointViaPod returns the response based on the provided "request" which is executed from the pod podName. +func pokeEndpointViaPod(f *framework.Framework, namespace, podName, targetHost string, targetPort uint16, request string) string { + ipPort := net.JoinHostPort(targetHost, fmt.Sprintf("%d", targetPort)) + curlCommand := fmt.Sprintf("curl -g -q -s http://%s/%s", + ipPort, + request) + stdOut, stdErr, err := e2epod.ExecShellInPodWithFullOutput(context.Background(), f, podName, curlCommand) + framework.ExpectNoError(err, "failed to run command within pod") + if stdErr != "" { + framework.Failf("failed to run command within pod %s/%s, stdout: %q, stderr: %q", namespace, podName, stdOut, stdErr) + } + return stdOut +} + +// pokeEndpointViaNode leverages a k8 node running the netexec command to send a "request" to a target running +// netexec on the given target host / protocol / port. +// Returns the response based on the provided "request". +func pokeEndpointViaNode(nodeName, protocol, targetHost string, localPort, targetPort uint16, request string) string { + ipPort := net.JoinHostPort("localhost", fmt.Sprintf("%d", localPort)) + // we leverage the dial command from netexec, that is already supporting multiple protocols + curlCommand := []string{"curl", "-g", "-q", "-s", fmt.Sprintf("http://%s/dial?request=%s&protocol=%s&host=%s&port=%d&tries=1", + ipPort, + request, + protocol, + targetHost, + targetPort)} + res, err := infraprovider.Get().ExecK8NodeCommand(nodeName, curlCommand) + framework.ExpectNoError(err, "failed to run command within pod") + response, err := parseNetexecResponse(res) + if err != nil { + framework.Logf("FAILED Command was %s", curlCommand) + framework.Logf("FAILED Response was %v", res) + return "" + } + framework.ExpectNoError(err) return response } // wrapper logic around pokeEndpoint // contact the ExternalIP service until each endpoint returns its hostname and return true, or false otherwise -func pokeExternalIpService(clientContainerName, protocol, externalAddress string, externalPort int32, maxTries int, nodesHostnames sets.String) bool { +func pokeExternalIpService(externalContainer infraapi.ExternalContainer, protocol, externalAddress string, externalPort int32, maxTries int, nodesHostnames sets.String) bool { responses := sets.NewString() for i := 0; i < maxTries; i++ { - epHostname := pokeEndpoint("", clientContainerName, protocol, externalAddress, externalPort, "hostname") + epHostname := pokeEndpointViaExternalContainer(externalContainer, protocol, externalAddress, externalPort, "hostname") responses.Insert(epHostname) // each endpoint returns its hostname. By doing this, we validate that each ep was reached at least once. @@ -370,8 +392,7 @@ func pokeExternalIpService(clientContainerName, protocol, externalAddress string // run a few iterations to make sure that the hwaddr is stable // we will always run iterations + 1 in the loop to make sure that we have values // to compare -func isNeighborEntryStable(clientContainer, targetHost string, iterations int) bool { - cmd := []string{containerRuntime, "exec", clientContainer} +func isNeighborEntryStable(externalContainer infraapi.ExternalContainer, targetHost string, iterations int) bool { var hwAddrOld string var hwAddrNew string // used for reporting only @@ -381,14 +402,16 @@ func isNeighborEntryStable(clientContainer, targetHost string, iterations int) b // make sure that we do not get Operation not permitted for neighbor entry deletion, // ignore everything else for the delete and the ping // RTNETLINK answers: Operation not permitted would indicate missing Cap NET_ADMIN + primaryInfName := infraprovider.Get().ExternalContainerPrimaryInterfaceName() script := fmt.Sprintf( - "OUTPUT=$(ip neigh del %s dev eth0 2>&1); "+ + "OUTPUT=$(ip neigh del %s dev %s 2>&1); "+ "if [[ \"$OUTPUT\" =~ \"Operation not permitted\" ]]; then "+ "echo \"$OUTPUT\";"+ "else "+ "ping -c1 -W1 %s &>/dev/null; ip -j neigh; "+ "fi", targetHost, + primaryInfName, targetHost, ) command := []string{ @@ -396,12 +419,11 @@ func isNeighborEntryStable(clientContainer, targetHost string, iterations int) b "-c", script, } - cmd = append(cmd, command...) // run this for time of iterations + 1 to make sure that the entry is stable for i := 0; i <= iterations; i++ { // run the command - output, err := runCommand(cmd...) + output, err := infraprovider.Get().ExecExternalContainerCommand(externalContainer, command) if err != nil { framework.ExpectNoError( fmt.Errorf("FAILED Command was: %s\nFAILED Response was: %v\nERROR is: %s", @@ -455,24 +477,15 @@ func isNeighborEntryStable(clientContainer, targetHost string, iterations int) b return true } -// curlInContainer leverages a container running the netexec command to send a request to a target running -// netexec on the given target host / protocol / port. +// wgetInExternalContainer issues a request to target host and port at endpoint. // Returns a pair of either result, nil or "", error in case of an error. -func curlInContainer(clientContainer, targetHost string, targetPort int32, endPoint string, maxTime int) (string, error) { - cmd := []string{containerRuntime, "exec", clientContainer} +func wgetInExternalContainer(externalContainer infraapi.ExternalContainer, targetHost string, targetPort int32, endPoint string, maxTime int) (string, error) { if utilnet.IsIPv6String(targetHost) { targetHost = fmt.Sprintf("[%s]", targetHost) } - - // we leverage the dial command from netexec, that is already supporting multiple protocols - curlCommand := strings.Split(fmt.Sprintf("curl --max-time %d http://%s:%d/%s", - maxTime, - targetHost, - targetPort, - endPoint), " ") - - cmd = append(cmd, curlCommand...) - return runCommand(cmd...) + return infraprovider.Get().ExecExternalContainerCommand(externalContainer, []string{ + "wget", fmt.Sprintf("http://%s:%d/%s", targetHost, targetPort, endPoint), "-O", "/dev/null", + }) } // parseNetexecResponse parses a json string of type '{"responses":"...", "errors":""}'. @@ -573,74 +586,6 @@ func getNodeStatus(node string) string { return status } -// Returns the container's ipv4 and ipv6 addresses IN ORDER -// related to the given network. -func getContainerAddressesForNetwork(container, network string) (string, string) { - ipv4Format := fmt.Sprintf("{{.NetworkSettings.Networks.%s.IPAddress}}", network) - ipv6Format := fmt.Sprintf("{{.NetworkSettings.Networks.%s.GlobalIPv6Address}}", network) - - ipv4, err := runCommand(containerRuntime, "inspect", "-f", ipv4Format, container) - if err != nil { - framework.Failf("failed to inspect external test container for its IPv4: %v", err) - } - ipv6, err := runCommand(containerRuntime, "inspect", "-f", ipv6Format, container) - if err != nil { - framework.Failf("failed to inspect external test container for its IPv4: %v", err) - } - return strings.TrimSuffix(ipv4, "\n"), strings.TrimSuffix(ipv6, "\n") -} - -// Returns the network's ipv4 and ipv6 CIDRs for the given network. -func getContainerNetworkCIDRs(network string) (string, string) { - output, err := runCommand(containerRuntime, "network", "inspect", network) - if err != nil { - framework.Failf("failed to inspect network %s: %v", network, err) - } - - // Parse the JSON output - var networks []map[string]interface{} - if err := json.Unmarshal([]byte(output), &networks); err != nil { - framework.Failf("failed to parse network inspect output: %v", err) - } - - if len(networks) == 0 { - framework.Failf("no network found with name %s", network) - } - - ipv4CIDR := "" - ipv6CIDR := "" - - if ipam, ok := networks[0]["IPAM"].(map[string]interface{}); ok { - if configs, ok := ipam["Config"].([]interface{}); ok { - for _, c := range configs { - if config, ok := c.(map[string]interface{}); ok { - if subnet, ok := config["Subnet"].(string); ok { - if strings.Contains(subnet, ":") { - ipv6CIDR = subnet - } else { - ipv4CIDR = subnet - } - } - } - } - } - } - - return ipv4CIDR, ipv6CIDR -} - -// Returns the container's MAC addresses -// related to the given network. -func getMACAddressesForNetwork(container, network string) string { - mac := fmt.Sprintf("{{.NetworkSettings.Networks.%s.MacAddress}}", network) - - macAddr, err := runCommand(containerRuntime, "inspect", "-f", mac, container) - if err != nil { - framework.Failf("failed to inspect external test container for its MAC: %v", err) - } - return strings.TrimSuffix(macAddr, "\n") -} - // waitClusterHealthy ensures we have a given number of ovn-k worker and master nodes, // as well as all nodes are healthy func waitClusterHealthy(f *framework.Framework, numControlPlanePods int, controlPlanePodName string) error { @@ -665,7 +610,7 @@ func waitClusterHealthy(f *framework.Framework, numControlPlanePods int, control return false, nil } - podClient := f.ClientSet.CoreV1().Pods(ovnNamespace) + podClient := f.ClientSet.CoreV1().Pods(deploymentconfig.Get().OVNKubernetesNamespace()) // Ensure all nodes are running and healthy podList, err := podClient.List(context.Background(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", @@ -856,7 +801,7 @@ func ExecCommandInContainerWithFullOutput(f *framework.Framework, namespace, pod func assertACLLogs(targetNodeName string, policyNameRegex string, expectedACLVerdict string, expectedACLSeverity string) (bool, error) { framework.Logf("collecting the ovn-controller logs for node: %s", targetNodeName) - targetNodeLog, err := runCommand([]string{containerRuntime, "exec", targetNodeName, "grep", "acl_log", ovnControllerLogPath}...) + targetNodeLog, err := infraprovider.Get().ExecK8NodeCommand(targetNodeName, []string{"grep", "acl_log", ovnControllerLogPath}) if err != nil { return false, fmt.Errorf("error accessing logs in node %s: %v", targetNodeName, err) } @@ -923,18 +868,12 @@ func patchService(c kubernetes.Interface, serviceName, serviceNamespace, jsonPat return nil } -// pokeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern -func pokeIPTableRules(clientContainer, pattern string) int { - cmd := []string{containerRuntime, "exec", clientContainer} - - ipv4Cmd := append(cmd, strings.Split("iptables-save -c", " ")...) - ipt4Rules, err := runCommand(ipv4Cmd...) - framework.ExpectNoError(err, "failed to get iptables rules from node %s", clientContainer) - - ipv6Cmd := append(cmd, strings.Split("ip6tables-save -c", " ")...) - ipt6Rules, err := runCommand(ipv6Cmd...) - framework.ExpectNoError(err, "failed to get ip6tables rules from node %s", clientContainer) - +// pokeNodeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern +func pokeNodeIPTableRules(nodeName, pattern string) int { + ipt4Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"iptables-save", "-c"}) + framework.ExpectNoError(err, "failed to get iptables rules from node %s", nodeName) + ipt6Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip6tables-save", "-c"}) + framework.ExpectNoError(err, "failed to get ip6tables rules from node %s", nodeName) iptRules := ipt4Rules + ipt6Rules framework.Logf("DEBUG: Dumping IPTRules %v", iptRules) numOfMatchRules := 0 @@ -950,12 +889,10 @@ func pokeIPTableRules(clientContainer, pattern string) int { // countNFTablesElements returns the number of nftables elements in the indicated set // of the "ovn-kubernetes" table. -func countNFTablesElements(clientContainer, name string) int { - cmd := []string{containerRuntime, "exec", clientContainer} - - nftCmd := append(cmd, "nft", "-j", "list", "set", "inet", "ovn-kubernetes", name) - nftElements, err := runCommand(nftCmd...) - framework.ExpectNoError(err, "failed to get nftables elements from node %s", clientContainer) +func countNFTablesElements(nodeName, name string) int { + nftCmd := []string{"nft", "-j", "list", "set", "inet", "ovn-kubernetes", name} + nftElements, err := infraprovider.Get().ExecK8NodeCommand(nodeName, nftCmd) + framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) framework.Logf("DEBUG: Dumping NFTElements %v", nftElements) // The output will look like @@ -1034,41 +971,33 @@ func wrappedTestFramework(basename string) *framework.Framework { testName := strings.Replace(ginkgo.CurrentSpecReport().LeafNodeText, " ", "_", -1) logDir := fmt.Sprintf("%s/e2e-dbs/%s-%s", logLocation, testName, f.UniqueName) - - var args []string - // grab all OVS and OVN dbs nodes, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) framework.ExpectNoError(err) for _, node := range nodes.Items { // ensure e2e-dbs directory with test case exists - args = []string{containerRuntime, "exec", node.Name, "mkdir", "-p", logDir} - _, err = runCommand(args...) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"mkdir", "-p", logDir}) framework.ExpectNoError(err) // Loop through potential OVSDB db locations for _, ovsdbLocation := range ovsdbLocations { - args = []string{containerRuntime, "exec", node.Name, "stat", fmt.Sprintf("%s/%s", ovsdbLocation, ovsdb)} - _, err = runCommand(args...) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"stat", fmt.Sprintf("%s/%s", ovsdbLocation, ovsdb)}) if err == nil { // node name is the same in kapi and docker - args = []string{containerRuntime, "exec", node.Name, "cp", "-f", fmt.Sprintf("%s/%s", ovsdbLocation, ovsdb), - fmt.Sprintf("%s/%s", logDir, fmt.Sprintf("%s-%s", node.Name, ovsdb))} - _, err = runCommand(args...) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"cp", "-f", fmt.Sprintf("%s/%s", ovsdbLocation, ovsdb), + fmt.Sprintf("%s/%s", logDir, fmt.Sprintf("%s-%s", node.Name, ovsdb))}) framework.ExpectNoError(err) break // Stop the loop: the file is found and copied successfully } } // IC will have dbs on every node, but legacy mode wont, check if they exist - args = []string{containerRuntime, "exec", node.Name, "stat", fmt.Sprintf("%s/%s", dbLocation, dbs[0])} - _, err = runCommand(args...) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"stat", fmt.Sprintf("%s/%s", dbLocation, dbs[0])}) if err == nil { for _, db := range dbs { - args = []string{containerRuntime, "exec", node.Name, "cp", "-f", fmt.Sprintf("%s/%s", dbLocation, db), - fmt.Sprintf("%s/%s", logDir, db)} - _, err = runCommand(args...) - framework.ExpectNoError(err) + _, err = infraprovider.Get().ExecK8NodeCommand(node.Name, []string{"cp", "-f", fmt.Sprintf("%s/%s", dbLocation, db), + fmt.Sprintf("%s/%s", logDir, db)}) + framework.ExpectNoError(err, "copy DBs to file location must succeed") } } } @@ -1093,7 +1022,7 @@ func countACLLogs(targetNodeName string, policyNameRegex string, expectedACLVerd count := 0 framework.Logf("collecting the ovn-controller logs for node: %s", targetNodeName) - targetNodeLog, err := runCommand([]string{containerRuntime, "exec", targetNodeName, "cat", ovnControllerLogPath}...) + targetNodeLog, err := infraprovider.Get().ExecK8NodeCommand(targetNodeName, []string{"cat", ovnControllerLogPath}) if err != nil { return 0, fmt.Errorf("error accessing logs in node %s: %v", targetNodeName, err) } @@ -1122,7 +1051,7 @@ func countACLLogs(targetNodeName string, policyNameRegex string, expectedACLVerd func getTemplateContainerEnv(namespace, resource, container, key string) string { args := []string{"get", resource, "-o=jsonpath='{.spec.template.spec.containers[?(@.name==\"" + container + "\")].env[?(@.name==\"" + key + "\")].value}'"} - value := e2ekubectl.RunKubectlOrDie(ovnNamespace, args...) + value := e2ekubectl.RunKubectlOrDie(namespace, args...) return strings.Trim(value, "'") } @@ -1167,15 +1096,13 @@ func updateIPTablesRulesForNode(op, nodeName string, ipTablesArgs []string, ipv6 if ipv6 { iptables = "ip6tables" } - - args := []string{"docker", "exec", nodeName, iptables, "-v", "--check"} - _, err := runCommand(append(args, ipTablesArgs...)...) + _, err := infraprovider.Get().ExecK8NodeCommand(nodeName, append([]string{iptables, "-v", "--check"}, ipTablesArgs...)) // errors known to be equivalent to not found notFound1 := "No chain/target/match by that name" notFound2 := "does a matching rule exist in that chain?" notFound := err != nil && (strings.Contains(err.Error(), notFound1) || strings.Contains(err.Error(), notFound2)) if err != nil && !notFound { - framework.Failf("failed to check existance of %s rule on node %s: %v", iptables, nodeName, err) + framework.Failf("failed to check existence of %s rule on node %s: %v", iptables, nodeName, err) } if op == "delete" && notFound { // rule is not there @@ -1184,10 +1111,9 @@ func updateIPTablesRulesForNode(op, nodeName string, ipTablesArgs []string, ipv6 // rule is already there return } - - args = []string{"docker", "exec", nodeName, iptables, "--" + op} framework.Logf("%s %s rule: %q on node %s", op, iptables, strings.Join(ipTablesArgs, ","), nodeName) - _, err = runCommand(append(args, ipTablesArgs...)...) + args := []string{iptables, "--" + op} + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, append(args, ipTablesArgs...)) if err != nil { framework.Failf("failed to update %s rule on node %s: %v", iptables, nodeName, err) } @@ -1204,12 +1130,12 @@ func randStr(n int) string { } func isIPv4Supported() bool { - val, present := os.LookupEnv("KIND_IPV4_SUPPORT") + val, present := os.LookupEnv("PLATFORM_IPV4_SUPPORT") return present && val == "true" } func isIPv6Supported() bool { - val, present := os.LookupEnv("KIND_IPV6_SUPPORT") + val, present := os.LookupEnv("PLATFORM_IPV6_SUPPORT") return present && val == "true" } @@ -1236,7 +1162,7 @@ func isLocalGWModeEnabled() bool { func singleNodePerZone() bool { if singleNodePerZoneResult == nil { args := []string{"get", "pods", "--selector=app=ovnkube-node", "-o", "jsonpath={.items[0].spec.containers[*].name}"} - containerNames := e2ekubectl.RunKubectlOrDie(ovnNamespace, args...) + containerNames := e2ekubectl.RunKubectlOrDie(deploymentconfig.Get().OVNKubernetesNamespace(), args...) result := true for _, containerName := range strings.Split(containerNames, " ") { if containerName == "ovnkube-node" { @@ -1285,19 +1211,17 @@ func routeToNode(nodeName string, ips []string, mtu int, add bool) error { } for _, ip := range ips { mask := 32 - ipCmd := []string{"ip"} + cmd := []string{"ip"} if utilnet.IsIPv6String(ip) { mask = 128 - ipCmd = []string{"ip", "-6"} + cmd = []string{"ip", "-6"} } var err error - cmd := []string{"docker", "exec", nodeName} - cmd = append(cmd, ipCmd...) cmd = append(cmd, "route", ipOp, fmt.Sprintf("%s/%d", ip, mask), "dev", "breth0") if mtu != 0 { cmd = append(cmd, "mtu", strconv.Itoa(mtu)) } - _, err = runCommand(cmd...) + _, err = infraprovider.Get().ExecK8NodeCommand(nodeName, cmd) if err != nil { return err } @@ -1309,11 +1233,7 @@ func routeToNode(nodeName string, ips []string, mtu int, add bool) error { func GetNodeIPv6LinkLocalAddressForEth0(nodeName string) (string, error) { // Command to get IPv6 link-local address for eth0 ipCmd := []string{"ip", "-6", "addr", "show", "dev", "eth0", "scope", "link"} - - cmd := []string{"docker", "exec", nodeName} - cmd = append(cmd, ipCmd...) - - output, err := runCommand(cmd...) + output, err := infraprovider.Get().ExecK8NodeCommand(nodeName, ipCmd) if err != nil { return "", fmt.Errorf("failed to get link-local address for eth0: %v", err) } @@ -1402,7 +1322,7 @@ func getGatewayMTUSupport(node *v1.Node) bool { } func isKernelModuleLoaded(nodeName, kernelModuleName string) bool { - out, err := runCommand(containerRuntime, "exec", nodeName, "lsmod") + out, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"lsmod"}) if err != nil { framework.Failf("failed to list kernel modules for node %s: %v", nodeName, err) } @@ -1488,9 +1408,19 @@ func waitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, p } func isDefaultNetworkAdvertised() bool { - podNetworkValue, err := runCommand("kubectl", "get", "ra", "default", "--template={{index .spec.advertisements 0}}") + podNetworkValue, err := e2ekubectl.RunKubectl("default", "get", "ra", "default", "--template={{index .spec.advertisements 0}}") if err != nil { return false } return strings.TrimSpace(string(podNetworkValue)) == "PodNetwork" } + +// getAgnHostHTTPPortBindFullCMD returns the full command for agnhost netexec server. Args must not be defined in Container spec. +func getAgnHostHTTPPortBindFullCMD(port uint16) []string { + return append([]string{"/agnhost"}, getAgnHostHTTPPortBindCMDArgs(port)...) +} + +// getAgnHostHTTPPortBindCMDArgs returns the aruments for /agnhost binary +func getAgnHostHTTPPortBindCMDArgs(port uint16) []string { + return []string{"netexec", fmt.Sprintf("--http-port=%d", port)} +} diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index d7dc3b5806..cf9589e589 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -34,8 +34,8 @@ ipv4 pod" SKIPPED_TESTS="" -if [ "$KIND_IPV4_SUPPORT" == true ]; then - if [ "$KIND_IPV6_SUPPORT" == true ]; then +if [ "$PLATFORM_IPV4_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # No support for these features in dual-stack yet SKIPPED_TESTS="hybrid.overlay" else @@ -45,7 +45,7 @@ if [ "$KIND_IPV4_SUPPORT" == true ]; then fi fi -if [ "$KIND_IPV4_SUPPORT" == false ]; then +if [ "$PLATFORM_IPV4_SUPPORT" == false ]; then SKIPPED_TESTS+="\[IPv4\]" fi @@ -66,7 +66,7 @@ else e2e br-int NetFlow export validation" fi -if [ "$KIND_IPV6_SUPPORT" == true ]; then +if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then if [ "$SKIPPED_TESTS" != "" ]; then SKIPPED_TESTS+="|" fi @@ -91,7 +91,7 @@ fi if [ "$OVN_GATEWAY_MODE" == "local" ]; then # See https://github.com/ovn-org/ovn-kubernetes/labels/ci-ipv6 for details: - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then if [ "$SKIPPED_TESTS" != "" ]; then SKIPPED_TESTS+="|" fi @@ -115,6 +115,13 @@ if [ "$ENABLE_MULTI_NET" != "true" ]; then SKIPPED_TESTS+="Multi Homing" fi +if [ "$OVN_NETWORK_QOS_ENABLE" != "true" ]; then + if [ "$SKIPPED_TESTS" != "" ]; then + SKIPPED_TESTS+="|" + fi + SKIPPED_TESTS+="e2e NetworkQoS validation" +fi + # Only run Node IP/MAC address migration tests if they are explicitly requested IP_MIGRATION_TESTS="Node IP and MAC address migration" if [[ "${WHAT}" != "${IP_MIGRATION_TESTS}"* ]]; then diff --git a/test/scripts/e2e-kind.sh b/test/scripts/e2e-kind.sh index 238293a66d..2ec08b59ff 100755 --- a/test/scripts/e2e-kind.sh +++ b/test/scripts/e2e-kind.sh @@ -129,18 +129,18 @@ RA_SKIPPED_TESTS=" SKIPPED_TESTS=$SKIPPED_TESTS$IPV6_ONLY_TESTS # Either single stack IPV6 or dualstack -if [ "$KIND_IPV6_SUPPORT" == true ]; then +if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then SKIPPED_TESTS=$SKIPPED_TESTS$SINGLESTACK_IPV4_ONLY_TESTS fi # IPv6 Only, skip any IPv4 Only Tests -if [ "$KIND_IPV4_SUPPORT" == false ] && [ "$KIND_IPV6_SUPPORT" == true ]; then +if [ "$PLATFORM_IPV4_SUPPORT" == false ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then echo "IPv6 Only" SKIPPED_TESTS=$SKIPPED_TESTS$IPV4_ONLY_TESTS fi # If not DualStack, skip DualStack tests -if [ "$KIND_IPV4_SUPPORT" == false ] || [ "$KIND_IPV6_SUPPORT" == false ]; then +if [ "$PLATFORM_IPV4_SUPPORT" == false ] || [ "$PLATFORM_IPV6_SUPPORT" == false ]; then SKIPPED_TESTS=$SKIPPED_TESTS$DUALSTACK_ONLY_TESTS fi diff --git a/test/scripts/upgrade-ovn.sh b/test/scripts/upgrade-ovn.sh index 79d329a5b6..7e1efa0390 100755 --- a/test/scripts/upgrade-ovn.sh +++ b/test/scripts/upgrade-ovn.sh @@ -17,7 +17,7 @@ kubectl_wait_pods() { # Check that everything is fine and running. IPv6 cluster seems to take a little # longer to come up, so extend the wait time. OVN_TIMEOUT=900s - if [ "$KIND_IPV6_SUPPORT" == true ]; then + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then OVN_TIMEOUT=1400s fi if ! kubectl wait -n ovn-kubernetes --for=condition=ready pods --all --timeout=${OVN_TIMEOUT} ; then @@ -58,7 +58,6 @@ kubectl_wait_daemonset(){ exit 1 fi done - } kubectl_wait_deployment(){ @@ -158,9 +157,9 @@ create_ovn_kube_manifests() { set_default_ovn_manifest_params() { # Set default values - # kind configs - KIND_IPV4_SUPPORT=${KIND_IPV4_SUPPORT:-true} - KIND_IPV6_SUPPORT=${KIND_IPV6_SUPPORT:-false} + # kind configs + PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} + PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} OVN_HA=${OVN_HA:-false} OVN_ENABLE_OVNKUBE_IDENTITY=${OVN_ENABLE_OVNKUBE_IDENTITY:-true} # ovn configs @@ -210,8 +209,8 @@ set_default_ovn_manifest_params() { print_ovn_manifest_params() { echo "Using these parameters to build upgraded ovn-k manifests" echo "" - echo "KIND_IPV4_SUPPORT = $KIND_IPV4_SUPPORT" - echo "KIND_IPV6_SUPPORT = $KIND_IPV6_SUPPORT" + echo "PLATFORM_IPV4_SUPPORT = $PLATFORM_IPV4_SUPPORT" + echo "PLATFORM_IPV6_SUPPORT = $PLATFORM_IPV6_SUPPORT" echo "OVN_HA = $OVN_HA" echo "OVN_GATEWAY_MODE = $OVN_GATEWAY_MODE" echo "OVN_HYBRID_OVERLAY_ENABLE = $OVN_HYBRID_OVERLAY_ENABLE" @@ -239,23 +238,23 @@ print_ovn_manifest_params() { } set_cluster_cidr_ip_families() { - if [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == false ]; then + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == false ]; then IP_FAMILY="" NET_CIDR=$NET_CIDR_IPV4 SVC_CIDR=$SVC_CIDR_IPV4 echo "IPv4 Only Support: API_IP=$API_IP --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" - elif [ "$KIND_IPV4_SUPPORT" == false ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV4_SUPPORT" == false ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then IP_FAMILY="ipv6" NET_CIDR=$NET_CIDR_IPV6 SVC_CIDR=$SVC_CIDR_IPV6 echo "IPv6 Only Support: API_IP=$API_IP --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" - elif [ "$KIND_IPV4_SUPPORT" == true ] && [ "$KIND_IPV6_SUPPORT" == true ]; then + elif [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == true ]; then IP_FAMILY="dual" NET_CIDR=$NET_CIDR_IPV4,$NET_CIDR_IPV6 SVC_CIDR=$SVC_CIDR_IPV4,$SVC_CIDR_IPV6 echo "Dual Stack Support: API_IP=$API_IP --net-cidr=$NET_CIDR --svc-cidr=$SVC_CIDR" else - echo "Invalid setup. KIND_IPV4_SUPPORT and/or KIND_IPV6_SUPPORT must be true." + echo "Invalid setup. PLATFORM_IPV4_SUPPORT and/or PLATFORM_IPV6_SUPPORT must be true." exit 1 fi }