diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index edd80849b2..e28f96a64e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -16,10 +16,24 @@ env: UBUNTU_IMAGE_NAME: ovn-kube-ubuntu BUILDER_IMAGE: quay.io/projectquay/golang:1.24 jobs: - build: - name: Build Images - runs-on: ubuntu-latest + # Build Fedora image for each platform + build-fedora: + name: Build Fedora (${{ matrix.platform }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: true + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + - name: Check out code into the Go module directory uses: actions/checkout@v4 @@ -39,8 +53,8 @@ jobs: with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up environment run: | export GOPATH=$(go env GOPATH) @@ -64,23 +78,19 @@ jobs: pushd dist/images echo "ref: ${BRANCH} commit: ${COMMIT}" > git_info popd - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - platforms: all - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - name: Extract metadata (tags, labels) for fedora ovn-k image - id: meta-fedora + id: meta uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }} - name: Build and push Fedora based Docker image + id: build uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} @@ -89,23 +99,201 @@ jobs: push: true build-args: | BUILDER_IMAGE=${{ env.BUILDER_IMAGE }} - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta-fedora.outputs.tags }} - labels: ${{ steps.meta-fedora.outputs.labels }} + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,scope=fedora-${{ env.PLATFORM_PAIR }} + cache-to: type=gha,mode=max,scope=fedora-${{ env.PLATFORM_PAIR }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-fedora-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + # Merge Fedora multi-platform images + merge-fedora: + name: Merge Fedora + runs-on: ubuntu-latest + needs: build-fedora + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-fedora-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for fedora ovn-k image + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + # Build Ubuntu image for each platform + build-ubuntu: + name: Build Ubuntu (${{ matrix.platform }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: true + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go-controller/go.mod' + # Disabling cache to avoid warnings until these two issues are fixed + # https://github.com/actions/setup-go/issues/424 + # https://github.com/actions/setup-go/issues/403 + # cache-dependency-path: "**/*.sum" + cache: false + id: go + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up environment + run: | + export GOPATH=$(go env GOPATH) + echo "GOPATH=$GOPATH" >> $GITHUB_ENV + echo "$GOPATH/bin" >> $GITHUB_PATH + + - name: Build ovnkube-binaries copy to context + run: | + pushd go-controller + make + popd + + pushd dist/images + cp -r ../../go-controller/_output/go/bin/* . + popd + + - name: Generate git-info to write to image + run: | + BRANCH=$(git rev-parse --short "$GITHUB_SHA") + COMMIT=$(git rev-parse HEAD) + pushd dist/images + echo "ref: ${BRANCH} commit: ${COMMIT}" > git_info + popd + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 - name: Extract metadata (tags, labels) for ubuntu ovn-k image - id: meta-ubuntu + id: meta uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }} - name: Build and push Ubuntu based Docker image + id: build uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} context: ./dist/images file: ./dist/images/Dockerfile.ubuntu push: true - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta-ubuntu.outputs.tags }} - labels: ${{ steps.meta-ubuntu.outputs.labels }} + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,scope=ubuntu-${{ env.PLATFORM_PAIR }} + cache-to: type=gha,mode=max,scope=ubuntu-${{ env.PLATFORM_PAIR }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-ubuntu-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + # Merge Ubuntu multi-platform images + merge-ubuntu: + name: Merge Ubuntu + runs-on: ubuntu-latest + needs: build-ubuntu + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-ubuntu-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for ubuntu ovn-k image + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }}:${{ steps.meta.outputs.version }} \ No newline at end of file diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml index b9fbdbd50e..7f691f6b3b 100644 --- a/.github/workflows/performance-test.yml +++ b/.github/workflows/performance-test.yml @@ -44,6 +44,7 @@ jobs: # target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-mac-migration", "compact-mode", "serial"] # shard-conformance: hybrid-overlay = multicast-enable = emptylb-enable = false # control-plane: hybrid-overlay = multicast-enable = emptylb-enable = true + # perf-test: ["all","kubelet-density-cni", "udn-density-l2-noPods", "cudn-density-l2-noPods"] # ha: ["HA", "noHA"] # gateway-mode: ["local", "shared"] # ipfamily: ["ipv4", "ipv6", "dualstack"] @@ -57,7 +58,10 @@ jobs: # network-segmentation : ["", "enable-network-segmentation"] # traffic-flow-tests : "" include: - - {"target": "node-density-cni", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "kubelet-density-cni", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "udn-density-l2-noPods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "cudn-density-l2-noPods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "udn-density-l2-pods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} env: ES_SERVER: "${{ secrets.PERF_DATASTORE }}" JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}-${{ matrix.ic }}" @@ -66,11 +70,11 @@ jobs: OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" OVN_HA: "${{ matrix.ha == 'HA' }}" OVN_DISABLE_SNAT_MULTIPLE_GWS: "${{ matrix.disable-snat-multiple-gws == 'noSnatGW' }}" - KIND_INSTALL_METALLB: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'network-segmentation' }}" + KIND_INSTALL_METALLB: "false" OVN_GATEWAY_MODE: "${{ matrix.gateway-mode }}" OVN_SECOND_BRIDGE: "${{ matrix.second-bridge == '2br' }}" - ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' || matrix.target == 'kv-live-migration' || matrix.target == 'network-segmentation' || matrix.target == 'tools' || matrix.target == 'multi-homing-helm' || matrix.target == 'traffic-flow-test-only' || matrix.routeadvertisements != '' }}" - ENABLE_NETWORK_SEGMENTATION: "${{ matrix.target == 'network-segmentation' || matrix.network-segmentation == 'enable-network-segmentation' }}" + ENABLE_MULTI_NET: true + ENABLE_NETWORK_SEGMENTATION: true PLATFORM_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" PLATFORM_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" KIND_INSTALL_KUBEVIRT: "${{ matrix.target == 'kv-live-migration' }}" @@ -98,9 +102,17 @@ jobs: KIND_INSTALL_PROMETHEUS: "true" KIND_PROMETHEUS_INFRA_ONLY: "true" METRICS_IP: "127.0.0.1" + GH_TOKEN: "${{ secrets.GITHUB_TOKEN }}" steps: - uses: actions/checkout@v4 + # Debug session for the performance test + #- name: Setup tmate session + # id: tmate + # uses: mxschmitt/action-tmate@v3 + # with: + # detached: true + - name: Get PR info for issue comment if: github.event_name == 'issue_comment' id: pr_info @@ -197,6 +209,20 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Download test-image-pr uses: actions/download-artifact@v4 with: @@ -281,7 +307,7 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: prometheus-install-logs + name: prometheus-install-logs-${{ matrix.perf-test }}-${{ github.run_id }} path: prometheus-install.log retention-days: 7 @@ -290,13 +316,8 @@ jobs: curl -L https://github.com/kube-burner/kube-burner/releases/download/v2.1.0/kube-burner-V2.1.0-linux-x86_64.tar.gz | tar xz chmod +x kube-burner sudo mv kube-burner /usr/local/bin/ - echo "KUBE_BURNER_VERSION=${KUBE_BURNER_VERSION}" >> $GITHUB_ENV - - name: git clone kube-burner repo - run: | - git clone http://github.com/kube-burner/kube-burner - - - name: Run kube-burner kubelet-density test + - name: "Run kube-burner ${{ matrix.perf-test }} workload" timeout-minutes: 120 run: | kind get kubeconfig > kconfig @@ -308,121 +329,71 @@ jobs: # Make sure the port-forward is up prior to running the workload sleep 30 - cp ./contrib/perf/metric-endpoint.yml kube-burner/examples/workloads/kubelet-density-cni - cp -f ./contrib/perf/workloads/kubelet-density-cni.yml kube-burner/examples/workloads/kubelet-density-cni/ - cp ./contrib/perf/performance-meta.yml kube-burner/examples/workloads/kubelet-density-cni - cp ./contrib/perf/metric-endpoint-local.yml kube-burner/examples/workloads/kubelet-density-cni - cp ./contrib/perf/metrics.yml kube-burner/examples/workloads/kubelet-density-cni - - cd kube-burner/examples/workloads/kubelet-density-cni + cd contrib/perf #Generate metadata. envsubst < performance-meta.yml > perf-meta.yml if [[ -z "${ES_SERVER}" ]]; then - kube-burner init --config kubelet-density-cni.yml -e metric-endpoint-local.yml --user-metadata perf-meta.yml + kube-burner init --config workloads/${{ matrix.perf-test }}.yml -e metric-endpoint-local.yml --user-metadata perf-meta.yml else - kube-burner init --config kubelet-density-cni.yml -e metric-endpoint.yml --user-metadata perf-meta.yml + kube-burner init --config workloads/${{ matrix.perf-test }}.yml -e metric-endpoint.yml --user-metadata perf-meta.yml fi - mkdir /tmp/pprof-data - cp pprof-data/* /tmp/pprof-data - - - name: Export kube-burner data - if: always() - run: | - mkdir -p /tmp/kube-burner - cp -r kube-burner/* /tmp/kube-burner/ + mkdir -p /tmp/${{ matrix.perf-test }}/pprof-data + mkdir -p /tmp/${{ matrix.perf-test }}/perf + cp -r pprof-data/* /tmp/${{ matrix.perf-test }}/pprof-data + cp -r * /tmp/${{ matrix.perf-test }}/perf - name: Generate performance report - if: always() + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} run: | - # Change to the kube-burner metrics directory - cd kube-burner/examples/workloads/kubelet-density-cni - - # Generate the performance report (without posting comment) - python3 ../../../../contrib/perf/generate_perf_report.py \ - --metrics-dir metrics/ \ - --output performance_report.md \ - --title "OVN-Kubernetes Performance Test Results - Run ${{ github.run_id }}" + cd contrib/perf + # Generate the performance report + python3 generate_perf_report.py \ + --workload ${{ matrix.perf-test }} \ + --metrics-dir /tmp/${{ matrix.perf-test }}/perf/metrics/ \ + --output /tmp/${{ matrix.perf-test }}/performance_report.md \ + --title "OVN-Kubernetes Performance Test Results - Run ${{ github.run_id }}" \ + --pr-number ${{ github.event_name == 'issue_comment' && github.event.issue.number || github.event_name == 'pull_request' && github.event.pull_request.number || github.event_name == 'workflow_dispatch' && '' }} \ + --github-comment echo "Performance report generated successfully" - cat performance_report.md + cat /tmp/${{ matrix.perf-test }}/performance_report.md - - name: Post performance report as PR comment - id: post_comment - if: always() && (github.event_name == 'pull_request' || github.event_name == 'issue_comment') - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const path = 'kube-burner/examples/workloads/kubelet-density-cni/performance_report.md'; - - if (fs.existsSync(path)) { - const report = fs.readFileSync(path, 'utf8'); - - // Get the issue/PR number based on event type - const issueNumber = context.eventName === 'pull_request' ? - context.payload.pull_request.number : - context.payload.issue.number; - - try { - await github.rest.issues.createComment({ - issue_number: issueNumber, - owner: context.repo.owner, - repo: context.repo.repo, - body: report - }); - console.log('Performance report posted as PR comment'); - return { success: true }; - } catch (error) { - console.error('Failed to post PR comment:', error.message); - return { success: false }; - } - } else { - console.log('Performance report file not found'); - return { success: false }; - } - - - name: Upload performance report as fallback - if: always() && (steps.post_comment.outcome == 'failure' || steps.post_comment.outputs.result == '{"success":false}' || (github.event_name != 'pull_request' && github.event_name != 'issue_comment')) + - name: Upload performance report + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: performance-report-${{ github.run_id }} - path: kube-burner/examples/workloads/kubelet-density-cni/performance_report.md + name: ${{ matrix.perf-test }}-performance-report-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/performance_report.md - name: Upload pprof data - if: always() + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: pprof-${{ github.run_id }} - path: /tmp/pprof-data + name: ${{ matrix.perf-test }}-pprof-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/pprof-data - - - name: Upload kube-burner data - if: always() + - name: Upload performance test data + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: kube-burner-performance-job-${{ github.run_id }} - path: /tmp/kube-burner - - - name: Runner Diagnostics - if: always() - uses: ./.github/actions/diagnostics + name: ${{ matrix.perf-test }}-performance-test-data-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }} - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + mkdir -p /tmp/${{ matrix.perf-test }}/logs + kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/${{ matrix.perf-test }}/logs - name: Upload kind logs if: always() uses: actions/upload-artifact@v4 with: - name: kind-logs-performance-job-${{ github.run_id }} - path: /tmp/kind/logs - + name: ${{ matrix.perf-test }}-kind-logs-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/logs build-pr: name: Build-PR @@ -430,7 +401,11 @@ jobs: if: | (github.event_name != 'issue_comment') || (github.event.issue.pull_request && - contains(github.event.comment.body, '/perf-test node-density-cni')) || + contains(github.event.comment.body, '/perf-test kubelet-density-cni')) || + (github.event.issue.pull_request && + contains(github.event.comment.body, '/perf-test udn-density-l2-noPods')) || + (github.event.issue.pull_request && + contains(github.event.comment.body, '/perf-test cudn-density-l2-noPods')) || (github.event_name == 'workflow_dispatch') steps: - name: Restore PR image cache diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31e3bfa6dd..e70c2d9f5d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -355,9 +355,24 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_BASE_TAR} && rm -rf ${CI_IMAGE_BASE_TAR} + docker images || true - name: kind setup run: | @@ -370,8 +385,7 @@ jobs: - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + ./contrib/export-kind-logs.sh set -x docker ps -a docker exec ovn-control-plane crictl images @@ -417,9 +431,7 @@ jobs: - name: Export kind logs if: always() - run: | - mkdir -p /tmp/kind/logs-kind-pr-branch - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs-kind-pr-branch + run: ./contrib/export-kind-logs.sh /tmp/kind/logs-kind-pr-branch - name: Upload kind logs if: always() @@ -457,6 +469,8 @@ jobs: - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default"} - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default"} + - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "no-overlay": "true"} + - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "no-overlay": "true"} - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "control-plane", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-disabled"} @@ -488,6 +502,8 @@ jobs: - {"target": "bgp", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "dns-name-resolver": "enable-dns-name-resolver"} - {"target": "bgp", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "dns-name-resolver": "enable-dns-name-resolver"} - {"target": "bgp", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv6", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation"} + - {"target": "bgp-no-overlay-helm", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "no-overlay": "true"} + - {"target": "bgp-no-overlay", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "no-overlay": "true"} - {"target": "bgp-loose-isolation", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "advertised-udn-isolation-mode": "loose"} - {"target": "traffic-flow-test-only","ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "traffic-flow-tests": "1-24", "network-segmentation": "enable-network-segmentation"} - {"target": "tools", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "network-segmentation": "enable-network-segmentation"} @@ -496,8 +512,8 @@ jobs: env: JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}-${{ matrix.ic }}" OVN_HYBRID_OVERLAY_ENABLE: ${{ (matrix.target == 'control-plane' || matrix.target == 'control-plane-helm') && (matrix.ipfamily == 'ipv4' || matrix.ipfamily == 'dualstack' ) }} - OVN_MULTICAST_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" - OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" + OVN_MULTICAST_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') || startsWith(matrix.target, 'bgp') }}" + OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'bgp') }}" OVN_HA: "${{ matrix.ha == 'HA' }}" OVN_DISABLE_SNAT_MULTIPLE_GWS: "${{ matrix.disable-snat-multiple-gws == 'noSnatGW' }}" KIND_INSTALL_METALLB: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') }}" @@ -514,7 +530,7 @@ jobs: KIND_NUM_WORKER: "${{ matrix.num-workers }}" KIND_NUM_NODES_PER_ZONE: "${{ matrix.num-nodes-per-zone }}" OVN_DISABLE_FORWARDING: "${{ matrix.forwarding == 'disable-forwarding' }}" - USE_HELM: "${{ matrix.target == 'control-plane-helm' || matrix.target == 'multi-homing-helm' }}" + USE_HELM: "${{ matrix.target == 'control-plane-helm' || matrix.target == 'multi-homing-helm' || matrix.target == 'bgp-no-overlay-helm' }}" OVN_ENABLE_DNSNAMERESOLVER: "${{ matrix.dns-name-resolver == 'enable-dns-name-resolver' }}" OVN_NETWORK_QOS_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' }}" TRAFFIC_FLOW_TESTS: "${{ matrix.traffic-flow-tests }}" @@ -529,6 +545,7 @@ jobs: OVN_UNPRIVILEGED_MODE: "${{ matrix.cni-mode == 'unprivileged' }}" MULTI_POD_SUBNET: true DYNAMIC_UDN_ALLOCATION: "${{ matrix.target == 'network-segmentation-dynamic' }}" + ENABLE_NO_OVERLAY: "${{ matrix.no-overlay == 'true' }}" steps: - name: Check out code into the Go module directory uses: actions/checkout@v4 @@ -632,9 +649,24 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true - name: kind setup timeout-minutes: 30 @@ -656,7 +688,7 @@ jobs: # set 3 hours for control-plane tests as these might take a while # give 10m extra to give ginkgo chance to timeout before github so that we # get its output - timeout-minutes: ${{ matrix.target == 'bgp-loose-isolation' && 190 || matrix.target == 'bgp' && 190 || matrix.target == 'control-plane' && 190 || matrix.target == 'control-plane-helm' && 190 || matrix.target == 'external-gateway' && 190 || startsWith(matrix.target, 'network-segmentation') && 190 || 130 }} + timeout-minutes: ${{ startsWith(matrix.target, 'bgp') && 190 || matrix.target == 'control-plane' && 190 || matrix.target == 'control-plane-helm' && 190 || matrix.target == 'external-gateway' && 190 || startsWith(matrix.target, 'network-segmentation') && 190 || 130 }} run: | # used by e2e diagnostics package export OVN_IMAGE="ovn-daemonset-fedora:pr" @@ -681,7 +713,7 @@ jobs: elif [[ "${{ matrix.target }}" == network-segmentation* ]]; then make -C test control-plane WHAT="Network Segmentation" make -C test control-plane WHAT="ClusterNetworkConnect" - elif [ "${{ matrix.target }}" == "bgp" ] || [ "${{ matrix.target }}" == "bgp-loose-isolation" ]; then + elif [[ "${{ matrix.target }}" == bgp* ]]; then make -C test control-plane elif [ "${{ matrix.target }}" == "serial" ]; then # Run only Serial tests with ginkgo focus @@ -712,8 +744,7 @@ jobs: - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + ./contrib/export-kind-logs.sh if [ -n "${TRAFFIC_FLOW_TESTS}" ]; then mv -v /tmp/{,kind/logs/}traffic_flow_test_result.json ||: fi @@ -790,9 +821,25 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true + - name: kind IPv4 setup run: | @@ -825,9 +872,7 @@ jobs: - name: Export kind logs if: always() - run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + run: ./contrib/export-kind-logs.sh - name: Upload kind logs if: always() diff --git a/ADOPTERS.md b/ADOPTERS.md index 0254f998c3..784dde2030 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -5,6 +5,8 @@ 1. Red Hat, Inc. (Uses OVN-Kubernetes as their default CNI in OpenShift product) 2. NVIDIA (Uses OVN-Kubernetes in their production environments) 3. Internet Initiative Japan Inc. (Uses OVN-Kubernetes in their on-premise Kubernetes platform) +4. SAIC Motor Corp. Ltd (Uses OVN-Kubernetes as a networking solution to build a multi-tenant private cloud) +5. Nutanix (Builds Flow CNI on OVN-Kubernetes, integrated with Nutanix Flow and VPC networking) ## Projects diff --git a/contrib/export-kind-logs.sh b/contrib/export-kind-logs.sh new file mode 100755 index 0000000000..4db2230ee6 --- /dev/null +++ b/contrib/export-kind-logs.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Export kind cluster logs and collect coredump binaries +# Usage: ./export-kind-logs.sh [logs_dir] +# Default logs_dir: /tmp/kind/logs + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/kind-common.sh" + +# Don't create cluster or delete kubeconfig - we're just exporting logs +KIND_CREATE=false +set_common_default_params + +export_logs "$@" diff --git a/contrib/kind-common b/contrib/kind-common.sh similarity index 67% rename from contrib/kind-common rename to contrib/kind-common.sh index c030826c08..763c63e3f0 100644 --- a/contrib/kind-common +++ b/contrib/kind-common.sh @@ -14,6 +14,9 @@ case $(uname -m) in aarch64) ARCH="arm64" ;; esac +# Directory for coredump collection (used by setup_coredumps and collect_coredump_binaries) +readonly COREDUMP_DIR="/tmp/kind/logs/coredumps" + if_error_exit() { ########################################################################### # Description: # @@ -33,12 +36,206 @@ if_error_exit() { } set_common_default_params() { + # KIND/cluster params + KIND_CREATE=${KIND_CREATE:-true} KIND_IMAGE=${KIND_IMAGE:-kindest/node} + KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} K8S_VERSION=${K8S_VERSION:-v1.34.0} KIND_SETTLE_DURATION=${KIND_SETTLE_DURATION:-30} + KIND_CONFIG=${KIND_CONFIG:-${DIR}/kind.yaml.j2} + KIND_LOCAL_REGISTRY=${KIND_LOCAL_REGISTRY:-false} + KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} + KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} + KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} + KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} + KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} + OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} + # Setup KUBECONFIG patch based on cluster-name + export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} + # Scrub any existing kubeconfigs at the path + if [ "${KIND_CREATE}" == true ]; then + rm -f "${KUBECONFIG}" + fi + + # Image/source code params + OVN_IMAGE=${OVN_IMAGE:-local} + OVN_REPO=${OVN_REPO:-""} + OVN_GITREF=${OVN_GITREF:-""} + + # Subnet params + # Input not currently validated. Modify outside script at your own risk. + # These are the same values defaulted to in KIND code (kind/default.go). + # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host + # so it needs to use a larger subnet + # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 + MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} + MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} + NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} + NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} + MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} + if [ "$MULTI_POD_SUBNET" == true ]; then + NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" + NET_CIDR_IPV6="fd00:10:243::/63/64,fd00:10:244::/48" + fi + NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} + SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} + SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} + JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} + JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} + TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} + TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} + METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} + METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} + PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} + PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} + + # Feature params + OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} + OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} + OVN_HA=${OVN_HA:-false} + ADVERTISE_DEFAULT_NETWORK=${ADVERTISE_DEFAULT_NETWORK:-false} + ADVERTISED_UDN_ISOLATION_MODE=${ADVERTISED_UDN_ISOLATION_MODE:-strict} + BGP_SERVER_NET_SUBNET_IPV4=${BGP_SERVER_NET_SUBNET_IPV4:-172.26.0.0/16} + BGP_SERVER_NET_SUBNET_IPV6=${BGP_SERVER_NET_SUBNET_IPV6:-fc00:f853:ccd:e796::/64} + OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} + OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} + OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} + OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} + ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} + METRICS_IP=${METRICS_IP:-""} + OVN_ALLOW_ICMP_NETPOL=${OVN_ALLOW_ICMP_NETPOL:-false} + OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} + if [ "$OVN_COMPACT_MODE" == true ]; then + KIND_NUM_WORKER=0 + fi + + KIND_NUM_MASTER=1 + if [ "$OVN_HA" == true ]; then + KIND_NUM_MASTER=3 + KIND_NUM_WORKER=${KIND_NUM_WORKER:-0} + else + KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} + fi + + OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} + if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then + echo "Compact mode cannot be used together with Interconnect" + exit 1 + fi + if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then + KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} + + TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) + if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then + echo "(Total k8s nodes / number of nodes per zone) should be zero" + exit 1 + fi + else + KIND_NUM_NODES_PER_ZONE=0 + fi + + ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} + ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} + if [ "$ENABLE_NETWORK_SEGMENTATION" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then + echo "Network segmentation (UDN) requires multi-network to be enabled (-mne)" + exit 1 + fi + + ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} + if [[ $ENABLE_NETWORK_CONNECT == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Network connect requires network-segmentation to be enabled (-nse)" + exit 1 + fi + + DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} + if [[ $DYNAMIC_UDN_ALLOCATION == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Dynamic UDN allocation requires network-segmentation to be enabled (-nse)" + exit 1 + fi + DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-120s} + + ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} + if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Preconfigured UDN addresses requires network-segmentation to be enabled (-nse)" + exit 1 + fi + if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $OVN_ENABLE_INTERCONNECT != true ]]; then + echo "Preconfigured UDN addresses requires interconnect to be enabled (-ic)" + exit 1 + fi + + ENABLE_ROUTE_ADVERTISEMENTS=${ENABLE_ROUTE_ADVERTISEMENTS:-false} + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then + echo "Route advertisements requires multi-network to be enabled (-mne)" + exit 1 + fi + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != true ]; then + echo "Route advertisements requires interconnect to be enabled (-ic)" + exit 1 + fi + + ENABLE_EVPN=${ENABLE_EVPN:-false} + if [ "$ENABLE_EVPN" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then + echo "EVPN requires Route advertisements to be enabled (-rae)" + exit 1 + fi + + ENABLE_NO_OVERLAY=${ENABLE_NO_OVERLAY:-false} + if [ "$ENABLE_NO_OVERLAY" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then + echo "No-overlay mode requires route advertisement to be enabled (-rae)" + exit 1 + fi + if [ "$ENABLE_NO_OVERLAY" == true ] && [ "$ADVERTISE_DEFAULT_NETWORK" != true ]; then + echo "No-overlay mode requires advertise the default network (-adv)" + exit 1 + fi + + if [ "$ENABLE_NO_OVERLAY" == true ]; then + # Set default MTU for no-overlay mode (1500) if not already set + OVN_MTU=${OVN_MTU:-1500} + else + # Set default MTU for overlay mode (1400) if not already set + OVN_MTU=${OVN_MTU:-1400} + fi +} + +set_ovn_image() { + if [ "${KIND_LOCAL_REGISTRY:-false}" == true ]; then + OVN_IMAGE="localhost:5000/ovn-daemonset-fedora:latest" + else + OVN_IMAGE="localhost/ovn-daemonset-fedora:dev" + fi +} + +build_ovn_image() { + local push_args="" + if [ "$OCI_BIN" == "podman" ]; then + # docker doesn't perform tls check by default only podman does, hence we need to disable it for podman. + push_args="--tls-verify=false" + fi + + if [ "$OVN_IMAGE" == local ]; then + set_ovn_image + + # Build image + make -C ${DIR}/../dist/images IMAGE="${OVN_IMAGE}" OVN_REPO="${OVN_REPO}" OVN_GITREF="${OVN_GITREF}" OCI_BIN="${OCI_BIN}" fedora-image + + # store in local registry + if [ "$KIND_LOCAL_REGISTRY" == true ];then + echo "Pushing built image to local $OCI_BIN registry" + $OCI_BIN push $push_args "$OVN_IMAGE" + fi + # We should push to local registry if image is not remote + elif [[ -n "${OVN_IMAGE}" && "${KIND_LOCAL_REGISTRY}" == true && "${OVN_IMAGE}" != */* ]]; then + local local_registry_ovn_image="localhost:5000/${OVN_IMAGE}" + $OCI_BIN tag "$OVN_IMAGE" $local_registry_ovn_image + OVN_IMAGE=$local_registry_ovn_image + $OCI_BIN push $push_args "$OVN_IMAGE" + fi } run_kubectl() { + kind export kubeconfig --name ${KIND_CLUSTER_NAME} local retries=0 local attempts=10 while true; do @@ -542,6 +739,71 @@ build_dnsnameresolver_images() { build_image /tmp/coredns-ocp-dnsnameresolver/operator ${DNSNAMERESOLVER_OPERATOR} Dockerfile } +check_common_dependencies() { + if ! command_exists curl ; then + echo "Dependency not met: Command not found 'curl'" + exit 1 + fi + + if ! command_exists kubectl ; then + echo "'kubectl' not found, installing" + setup_kubectl_bin + fi + + if ! command_exists kind ; then + echo "Dependency not met: Command not found 'kind'" + exit 1 + fi + + local kind_min="0.27.0" + local kind_cur + kind_cur=$(kind version -q) + if [ "$(echo -e "$kind_min\n$kind_cur" | sort -V | head -1)" != "$kind_min" ]; then + echo "Dependency not met: expected kind version >= $kind_min but have $kind_cur" + exit 1 + fi + + if ! command_exists jq ; then + echo "Dependency not met: Command not found 'jq'" + exit 1 + fi + + if ! command_exists awk ; then + echo "Dependency not met: Command not found 'awk'" + exit 1 + fi + + if ! command_exists jinjanate ; then + if ! command_exists pipx ; then + echo "Dependency not met: 'jinjanator' not installed and cannot install with 'pipx'" + exit 1 + fi + echo "'jinjanate' not found, installing with 'pipx'" + install_jinjanator_renderer + fi + + if ! command_exists docker && ! command_exists podman; then + echo "Dependency not met: Neither docker nor podman found" + exit 1 + fi + + if command_exists podman && ! command_exists skopeo; then + echo "Dependency not met: skopeo not installed. Run the following command to install it: 'sudo dnf install skopeo'" + exit 1 + fi +} + +install_jinjanator_renderer() { + # ensure jinjanator renderer installed + pipx install jinjanator[yaml] + pipx ensurepath --force >/dev/null + export PATH=~/.local/bin:$PATH +} + +install_ovn_image() { + install_image "${OVN_IMAGE}" +} + # install_image accepts the image name along with the tag as an argument and installs it. install_image() { # If local registry is being used push image there for consumption by kind cluster @@ -822,7 +1084,7 @@ destroy_bgp() { fi } -install_ffr_k8s() { +install_frr_k8s() { echo "Installing frr-k8s ..." clone_frr @@ -841,8 +1103,8 @@ install_ffr_k8s() { if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Find all line numbers where the IPv4 prefix is defined IPv6_LINE=" - prefix: ${BGP_SERVER_NET_SUBNET_IPV6}" - # Process each occurrence of the IPv4 prefix - for LINE_NUM in $(grep -n "prefix: ${BGP_SERVER_NET_SUBNET_IPV4}" receive_filtered.yaml | cut -d ':' -f 1); do + # Process each occurrence of the IPv4 prefix in reverse order to avoid line number shifting + for LINE_NUM in $(grep -n "prefix: ${BGP_SERVER_NET_SUBNET_IPV4}" receive_filtered.yaml | cut -d ':' -f 1 | sort -rn); do # Insert the IPv6 prefix after each IPv4 prefix line sed -i "${LINE_NUM}a\\${IPv6_LINE}" receive_filtered.yaml done @@ -923,18 +1185,18 @@ interconnect_arg_check() { setup_coredumps() { # Setup core dump collection # - # Core dumps will be saved on the HOST at /tmp/kind/logs/coredumps (not inside containers) + # Core dumps will be saved on the HOST at $COREDUMP_DIR (not inside containers) # because kernel.core_pattern is a kernel-level setting shared across all containers. # # - Using a pipe instead of a file path avoids needing to mount - # /tmp/kind/logs/coredumps into every container that might crash - # - The pipe executes in the host's namespace, so /tmp/kind/logs/coredumps + # $COREDUMP_DIR into every container that might crash + # - The pipe executes in the host's namespace, so $COREDUMP_DIR # automatically refers to the host path # - # Location: /tmp/kind/logs is used to ensure coredumps are exported in CI + # Location: COREDUMP_DIR is under /tmp/kind/logs to ensure coredumps are exported in CI # Use container exec to avoid asking for root permissions - mkdir -p "/tmp/kind/logs/coredumps" + mkdir -p "$COREDUMP_DIR" ulimit -c unlimited for node in $(kind get nodes --name "${KIND_CLUSTER_NAME}"); do # Core dump filename pattern variables: @@ -942,6 +1204,257 @@ setup_coredumps() { # %e - executable filename # %h - hostname (container hostname) # %s - signal number that caused dump - ${OCI_BIN} exec "$node" sysctl -w kernel.core_pattern="|/bin/dd of=/tmp/kind/logs/coredumps/core.%P.%e.%h.%s bs=1M status=none" + ${OCI_BIN} exec "$node" sysctl -w kernel.core_pattern="|/bin/dd of=${COREDUMP_DIR}/core.%P.%e.%h.%s bs=1M status=none" + done +} + +wait_for_coredumps() { + # Wait for any in-progress coredump writes to complete + # The kernel pipes coredumps to dd processes, which can take 30+ seconds for large Go binaries + # + # Challenge: Go's crash handling (printing stack traces for all goroutines) takes + # several seconds BEFORE it calls abort() and the kernel starts the coredump. + # So we can't just check for dd processes - we need to wait for potential crashes + # to fully materialize. + + local max_wait=120 # Maximum wait time in seconds + local initial_wait=15 # Initial wait for Go crash handling to complete + local waited=0 + + if [ ! -d "$COREDUMP_DIR" ]; then + return 0 + fi + + # Record initial coredump count + local initial_count + initial_count=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null | wc -l || echo 0) + echo "Checking for in-progress coredump writes (initial count: $initial_count)..." + + # Initial wait: Go's crash handling (printing goroutine stack traces) can take + # 10+ seconds before abort() is called and the kernel starts the coredump + echo "Waiting ${initial_wait}s for any pending crash handling to complete..." + sleep "$initial_wait" + waited=$initial_wait + + while [ $waited -lt $max_wait ]; do + # Check for dd processes writing to the coredump directory + local dd_procs + dd_procs=$(pgrep -f "dd of=${COREDUMP_DIR}" 2>/dev/null || true) + + # Check current coredump count + local current_count + current_count=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null | wc -l || echo 0) + + if [ -z "$dd_procs" ]; then + # No dd processes running + if [ "$current_count" -gt "$initial_count" ]; then + echo "New coredumps detected (initial: $initial_count, current: $current_count) after ${waited}s" + fi + echo "No coredump writes in progress after ${waited}s" + return 0 + fi + + echo "Waiting for coredump writes... (${waited}s, dd PIDs: $dd_procs, coredumps: $current_count)" + sleep 5 + waited=$((waited + 5)) + done + + echo "Warning: Timed out waiting for coredump writes after ${max_wait}s" +} + +export_logs() { + # Export kind logs and collect coredump binaries + # Usage: export_logs [logs_dir] + # Default logs_dir: /tmp/kind/logs + + local logs_dir="${1:-/tmp/kind/logs}" + + mkdir -p "$logs_dir" + + # Wait for any in-progress coredump writes to complete before exporting + wait_for_coredumps + + kind export logs --name "${KIND_CLUSTER_NAME}" --verbosity 4 "$logs_dir" + collect_coredump_binaries +} + +# Helper function to try extracting a binary from a container +# Used by collect_coredump_binaries() +try_extract_binary() { + local node=$1 + local container_id=$2 + local exe=$3 + local binary_dir=$4 + + # Get container's PID to access its rootfs via /proc//root + local pid + pid=$(${OCI_BIN} exec "$node" crictl inspect "$container_id" 2>/dev/null | jq -r '.info.pid // empty') + if [ -z "$pid" ] || [ "$pid" = "null" ] || [ "$pid" = "0" ]; then + return 1 + fi + + # Common paths where binaries might be located + local binary_paths=("/usr/bin" "/bin" "/usr/sbin" "/sbin" "/usr/libexec/cni" "/usr/lib/frr") + + for path in "${binary_paths[@]}"; do + local full_path="/proc/${pid}/root${path}/${exe}" + if ${OCI_BIN} exec "$node" test -f "$full_path" 2>/dev/null; then + if ${OCI_BIN} exec "$node" cat "$full_path" > "${binary_dir}/${exe}" 2>/dev/null && [ -s "${binary_dir}/${exe}" ]; then + echo " Collected binary: ${exe} from container $container_id (pid $pid)" + return 0 + fi + fi + done + rm -f "${binary_dir}/${exe}" 2>/dev/null + return 1 +} + +collect_coredump_binaries() { + # Collect binaries that caused coredumps for post-mortem debugging + # Parses coredump filenames (core.%P.%e.%h.%s) to identify executables + # Binaries run inside pod containers, so we use crictl to access them + + local binary_dir="${COREDUMP_DIR}/binaries" + + if [ ! -d "$COREDUMP_DIR" ]; then + echo "No coredump directory found, skipping binary collection" + return 0 + fi + + local coredumps + coredumps=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null) + if [ -z "$coredumps" ]; then + echo "No coredumps found, skipping binary collection" + return 0 + fi + + mkdir -p "$binary_dir" + + # Get all KIND nodes + local nodes + nodes=$(kind get nodes --name "${KIND_CLUSTER_NAME}" 2>/dev/null) + if [ -z "$nodes" ]; then + echo "Warning: No KIND nodes available, cannot collect binaries" + return 0 + fi + + # Process each coredump: extract exe name (%e, field 3) + # Filename format: core.%P.%e.%h.%s (see setup_coredumps) + for coredump in $coredumps; do + local filename + filename=$(basename "$coredump") + local exe + exe=$(echo "$filename" | cut -d. -f3) + + echo "Processing coredump: $filename (exe=$exe)" + + # Skip if we already collected this binary + if [ -f "${binary_dir}/${exe}" ]; then + echo " Binary $exe already collected, skipping" + continue + fi + + local found=false + + # Search all containers on all nodes for the binary + for node in $nodes; do + local containers + containers=$(${OCI_BIN} exec "$node" crictl ps -q 2>/dev/null) || true + for container_id in $containers; do + if try_extract_binary "$node" "$container_id" "$exe" "$binary_dir"; then + echo " Collected $exe from container $container_id on node $node" + found=true + break 2 + fi + done + done + + # Fallback: binary running directly on KIND node (not in container) + if [ "$found" = false ]; then + for node in $nodes; do + local bin_path + bin_path=$(${OCI_BIN} exec "$node" which "$exe" 2>/dev/null) || true + if [ -n "$bin_path" ]; then + echo " Collected $exe from node $node at $bin_path" + ${OCI_BIN} cp "${node}:${bin_path}" "${binary_dir}/${exe}" && found=true || true + break + fi + done + fi + + if [ "$found" = false ]; then + echo " WARNING: Could not find binary '$exe'" + fi + done + + echo "Binary collection complete:" + ls -la "$binary_dir" 2>/dev/null || true +} + +# Some environments (Fedora32,31 on desktop), have problems when the cluster +# is deleted directly with kind `kind delete cluster --name ovn`, it restarts the host. +# The root cause is unknown, this also can not be reproduced in Ubuntu 20.04 or +# with Fedora32 Cloud, but it does not happen if we clean first the ovn-kubernetes resources. +delete() { + OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} + + if [ "$KIND_INSTALL_METALLB" == true ]; then + destroy_metallb + fi + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + destroy_bgp + fi + timeout 5 kubectl --kubeconfig "${KUBECONFIG}" delete namespace ovn-kubernetes || true + sleep 5 + kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" +} + +create_kind_cluster() { + # Output of the jinjanate command + KIND_CONFIG_LCL=${DIR}/kind-${KIND_CLUSTER_NAME}.yaml + + ovn_ip_family=${IP_FAMILY} \ + ovn_ha=${OVN_HA} \ + net_cidr="${KIND_CIDR}" \ + svc_cidr=${SVC_CIDR} \ + use_local_registry=${KIND_LOCAL_REGISTRY} \ + dns_domain=${KIND_DNS_DOMAIN} \ + ovn_num_master=${KIND_NUM_MASTER} \ + ovn_num_worker=${KIND_NUM_WORKER} \ + kind_num_infra=${KIND_NUM_INFRA} \ + cluster_log_level=${KIND_CLUSTER_LOGLEVEL:-4} \ + kind_local_registry_port=${KIND_LOCAL_REGISTRY_PORT} \ + kind_local_registry_name=${KIND_LOCAL_REGISTRY_NAME} \ + jinjanate "${KIND_CONFIG}" -o "${KIND_CONFIG_LCL}" + + # Create KIND cluster. For additional debug, add '--verbosity ': 0 None .. 3 Debug + if kind get clusters | grep "${KIND_CLUSTER_NAME}"; then + delete + fi + + if [[ "${KIND_LOCAL_REGISTRY}" == true ]]; then + create_local_registry + fi + + kind create cluster --name "${KIND_CLUSTER_NAME}" --kubeconfig "${KUBECONFIG}" --image "${KIND_IMAGE}":"${K8S_VERSION}" --config=${KIND_CONFIG_LCL} --retain + + cat "${KUBECONFIG}" +} + +remove_no_schedule_taint() { + KIND_NODES=$(kind_get_nodes | sort) + for n in $KIND_NODES; do + # do not error if it fails to remove the taint + kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- || true + done +} + +label_ovn_ha() { + MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") + # We want OVN HA not Kubernetes HA + # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= + # to choose the nodes where ovn master components will be placed + for n in $MASTER_NODES; do + kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite done } diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index d8d2ba75cc..285f8e6c0c 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -5,94 +5,21 @@ set -eo pipefail # Returns the full directory name of the script export DIR="$( cd -- "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -export OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} - -# Source the kind-common file from the same directory where this script is located -source "${DIR}/kind-common" +# Source the kind-common.sh file from the same directory where this script is located +source "${DIR}/kind-common.sh" set_default_params() { set_common_default_params - # Set default values - export KIND_CONFIG=${KIND_CONFIG:-} - export KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} - export KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} - export KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} - export KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} - export OVN_HA=${OVN_HA:-false} - export OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} - export OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} - export OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} - export OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} - export KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} - export ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} - export ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} - export ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} - export ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} - export OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} - export KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} - export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} - export OVN_IMAGE=${OVN_IMAGE:-'ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu:helm'} - - # Setup KUBECONFIG patch based on cluster-name - export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} - - # Validated params that work - export MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} - export MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} - - # Input not currently validated. Modify outside script at your own risk. - # These are the same values defaulted to in KIND code (kind/default.go). - # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host - # so it needs to use a larger subnet - # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 - export NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} - if [ "$MULTI_POD_SUBNET" == true ]; then - NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" - fi - export NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} - export SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} - export NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} - export SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} - export JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} - export JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} - export TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} - export TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} - export METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} - export METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} - export DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} - export DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-} - - export KIND_NUM_MASTER=1 - if [ "$OVN_HA" == true ]; then - KIND_NUM_MASTER=3 - fi - - OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} - if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then - echo "Compact mode cannot be used together with Interconnect" - exit 1 + # Hard code ipv4 support until IPv6 is implemented + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then + echo "kind-helm.sh does not support IPv6 yet" + exit 1 fi - - - if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then - KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} - TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) - if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then - echo "(Total k8s nodes / number of nodes per zone) should be zero" - exit 1 - fi - else - KIND_NUM_NODES_PER_ZONE=0 + if [ "$PLATFORM_IPV4_SUPPORT" != true ]; then + echo "kind-helm.sh only supports IPv4, must set PLATFORM_IPV4_SUPPORT to true " + exit 1 fi - - # Hard code ipv4 support until IPv6 is implemented - export PLATFORM_IPV4_SUPPORT=true - - export OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} - export MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} - export ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} - export METRICS_IP=${METRICS_IP:-""} } usage() { @@ -111,14 +38,23 @@ usage() { echo " [ -nse | --network-segmentation-enable ]" echo " [ -nce | --network-connect-enable ]" echo " [ -uae | --preconfigured-udn-addresses-enable ]" + echo " [ -rae | --route-advertisements-enable ]" + echo " [ -evpn | --evpn-enable ]" echo " [-dudn | --dynamic-udn-allocation]" echo " [-dug | --dynamic-udn-removal-grace-period]" + echo " [-adv | --advertise-default-network]" + echo " [-rud | --routed-udn-isolation-disable]" echo " [ -nqe | --network-qos-enable ]" + echo " [ -noe | --no-overlay-enable ]" echo " [ -wk | --num-workers ]" echo " [ -ic | --enable-interconnect]" echo " [ -npz | --node-per-zone ]" + echo " [ -ov | --ovn-image ]" + echo " [ -ovr | --ovn-repo ]" + echo " [ -ovg | --ovn-gitref ]" echo " [ -cn | --cluster-name ]" echo " [ -mip | --metrics-ip ]" + echo " [ -mtu ]" echo " [ --enable-coredumps ]" echo " [ -h ]" echo "" @@ -139,18 +75,28 @@ usage() { echo "-nse | --network-segmentation-enable Enable network segmentation. DEFAULT: Disabled" echo "-nce | --network-connect-enable Enable network connect (requires network segmentation). DEFAULT: Disabled" echo "-uae | --preconfigured-udn-addresses-enable Enable connecting workloads with preconfigured network to user-defined networks. DEFAULT: Disabled" + echo "-rae | --route-advertisements-enable Enable route advertisements" + echo "-evpn | --evpn-enable Enable EVPN" echo "-dudn | --dynamic-udn-allocation Enable dynamic UDN allocation. DEFAULT: Disabled" echo "-dug | --dynamic-udn-removal-grace-period Configure the grace period in seconds for dynamic UDN removal. DEFAULT: 120 seconds" + echo "-adv | --advertise-default-network Applies a RouteAdvertisements configuration to advertise the default network on all nodes" + echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled" + echo "-noe | --no-overlay-enable Enable no-overlay mode for the default network. DEFAULT: Disabled" echo "-ha | --ha-enabled Enable high availability. DEFAULT: HA Disabled" echo "-wk | --num-workers Number of worker nodes. DEFAULT: 2 workers" + echo "-ov | --ovn-image Use the specified docker image instead of building locally. DEFAULT: local build." + echo "-ovr | --ovn-repo Specify the repository to build OVN from" + echo "-ovg | --ovn-gitref Specify the branch, tag or commit id to build OVN from, it can be a pattern like 'branch-*' it will order results and use the first one" echo "-cn | --cluster-name Configure the kind cluster's name" echo "-mip | --metrics-ip IP address to bind metrics endpoints. DEFAULT: K8S_NODE_IP or 0.0.0.0" + echo "-mtu Define the overlay mtu. DEFAULT: 1400 (1500 for no-overlay mode)" echo "--enable-coredumps Enable coredump collection on kind nodes. DEFAULT: Disabled" echo "-dns | --enable-dnsnameresolver Enable DNSNameResolver for resolving the DNS names used in the DNS rules of EgressFirewall." echo "-ce | --enable-central [DEPRECATED] Deploy with OVN Central (Legacy Architecture)" echo "-npz | --nodes-per-zone Specify number of nodes per zone (Default 0, which means global zone; >0 means interconnect zone, where 1 for single-node zone, >1 for multi-node zone). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" + echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -195,6 +141,14 @@ parse_args() { ;; -uae | --preconfigured-udn-addresses-enable) ENABLE_PRE_CONF_UDN_ADDR=true ;; + -rae | --route-advertisements-enable) ENABLE_ROUTE_ADVERTISEMENTS=true + ;; + -evpn | --evpn-enable) ENABLE_EVPN=true + ;; + -adv | --advertise-default-network) ADVERTISE_DEFAULT_NETWORK=true + ;; + -rud | --routed-udn-isolation-disable) ADVERTISED_UDN_ISOLATION_MODE=loose + ;; -dudn | --dynamic-udn-allocation) DYNAMIC_UDN_ALLOCATION=true ;; -dug | --dynamic-udn-removal-grace-period) shift @@ -210,6 +164,8 @@ parse_args() { ;; -nqe | --network-qos-enable ) OVN_NETWORK_QOS_ENABLE=true ;; + -noe | --no-overlay-enable ) ENABLE_NO_OVERLAY=true + ;; -ha | --ha-enabled ) OVN_HA=true KIND_NUM_MASTER=3 ;; @@ -221,6 +177,15 @@ parse_args() { fi KIND_NUM_WORKER=$1 ;; + -ov | --ovn-image ) shift + OVN_IMAGE=$1 + ;; + -ovr | --ovn-repo ) shift + OVN_REPO=$1 + ;; + -ovg | --ovn-gitref ) shift + OVN_GITREF=$1 + ;; -cn | --cluster-name ) shift KIND_CLUSTER_NAME=$1 # Setup KUBECONFIG @@ -232,6 +197,8 @@ parse_args() { OVN_ENABLE_INTERCONNECT=false CENTRAL_ARG_PROVIDED=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -ic | --enable-interconnect ) OVN_ENABLE_INTERCONNECT=true IC_ARG_PROVIDED=true ;; @@ -248,6 +215,9 @@ parse_args() { -mip | --metrics-ip ) shift METRICS_IP="$1" ;; + -mtu ) shift + OVN_MTU=$1 + ;; --enable-coredumps ) ENABLE_COREDUMPS=true ;; * ) usage @@ -267,6 +237,7 @@ print_params() { echo "" echo "KIND_CONFIG_FILE = $KIND_CONFIG" echo "KUBECONFIG = $KUBECONFIG" + echo "OCI_BIN = $OCI_BIN" echo "KIND_INSTALL_INGRESS = $KIND_INSTALL_INGRESS" echo "KIND_INSTALL_METALLB = $KIND_INSTALL_METALLB" echo "KIND_INSTALL_PLUGINS = $KIND_INSTALL_PLUGINS" @@ -282,12 +253,21 @@ print_params() { echo "ENABLE_NETWORK_SEGMENTATION = $ENABLE_NETWORK_SEGMENTATION" echo "ENABLE_NETWORK_CONNECT = $ENABLE_NETWORK_CONNECT" echo "ENABLE_PRE_CONF_UDN_ADDR = $ENABLE_PRE_CONF_UDN_ADDR" + echo "ENABLE_ROUTE_ADVERTISEMENTS = $ENABLE_ROUTE_ADVERTISEMENTS" + echo "ENABLE_EVPN = $ENABLE_EVPN" + echo "ADVERTISE_DEFAULT_NETWORK = $ADVERTISE_DEFAULT_NETWORK" + echo "ADVERTISED_UDN_ISOLATION_MODE = $ADVERTISED_UDN_ISOLATION_MODE" echo "OVN_NETWORK_QOS_ENABLE = $OVN_NETWORK_QOS_ENABLE" + echo "ENABLE_NO_OVERLAY = $ENABLE_NO_OVERLAY" + echo "OVN_MTU = $OVN_MTU" echo "OVN_IMAGE = $OVN_IMAGE" + echo "OVN_REPO = $OVN_REPO" + echo "OVN_GITREF = $OVN_GITREF" echo "KIND_NUM_MASTER = $KIND_NUM_MASTER" echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "OVN_ENABLE_INTERCONNECT = $OVN_ENABLE_INTERCONNECT" echo "DYNAMIC_UDN_ALLOCATION = $DYNAMIC_UDN_ALLOCATION" echo "DYNAMIC_UDN_GRACE_PERIOD = $DYNAMIC_UDN_GRACE_PERIOD" @@ -302,23 +282,11 @@ print_params() { } check_dependencies() { - if ! command_exists kubectl ; then - echo "'kubectl' not found, installing" - setup_kubectl_bin - fi - - for cmd in "$OCI_BIN" kind helm go ; do \ - if ! command_exists "$cmd" ; then - echo "Dependency not met: $cmd" - exit 1 - fi - done - - # check for currently unsupported features - if [ "${PLATFORM_IPV6_SUPPORT:-}" = "true" ]; then - echo "Fatal: PLATFORM_IPV6_SUPPORT support not implemented yet" - exit 1 - fi + check_common_dependencies + if ! command_exists helm ; then + echo "'helm' not found, exiting" + exit 1 + fi } helm_prereqs() { @@ -328,103 +296,6 @@ helm_prereqs() { sudo sysctl fs.inotify.max_user_instances=512 } -build_ovn_image() { - if [ "${SKIP_OVN_IMAGE_REBUILD}" == "true" ]; then - echo "Explicitly instructed not to rebuild ovn image: ${OVN_IMAGE}" - return - fi - - # Build ovn kube image - pushd ${DIR}/../dist/images - make fedora-image - popd -} - -get_image() { - local image_and_tag="${1:-$OVN_IMAGE}" # Use $1 if provided, otherwise use $OVN_IMAGE - local image="${image_and_tag%%:*}" # Extract everything before the first colon - echo "$image" -} - -get_tag() { - local image_and_tag="${1:-$OVN_IMAGE}" # Use $1 if provided, otherwise use $OVN_IMAGE - local tag="${image_and_tag##*:}" # Extract everything after the last colon - echo "$tag" -} - -create_kind_cluster() { - [ -n "${KIND_CONFIG}" ] || { - KIND_CONFIG='/tmp/kind.yaml' - - # Start of the kind configuration - cat < /tmp/kind.yaml -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -nodes: -- role: control-plane - kubeadmConfigPatches: - - | - kind: InitConfiguration - nodeRegistration: - kubeletExtraArgs: - node-labels: "ingress-ready=true" - authorization-mode: "AlwaysAllow" -EOT - } - - # Add control-plane nodes based on OVN_HA status. If there are 2 or more worker nodes, use - # 2 of them them to host databases instead of creating additional control plane nodes. - if [ "$OVN_HA" == true ] && [ "$KIND_NUM_WORKER" -lt 2 ]; then - for i in {2..3}; do # Have 3 control-plane nodes for HA - echo "- role: control-plane" >> /tmp/kind.yaml - done - fi - - # Add worker nodes based on KIND_NUM_WORKER - for i in $(seq 1 $KIND_NUM_WORKER); do - echo "- role: worker" >> /tmp/kind.yaml - done - # kind only allows single subnet for pod network, while ovn-kubernetes supports multiple subnets. - # So we pick the first subnet from the provided list for kind configuration and store it in KIND_CIDR. - # remove host subnet mask info for kind configuration (when the subnet is set as 10.0.0.0/16/14) - KIND_CIDR_IPV4=$(echo "${NET_CIDR_IPV4}"| cut -d',' -f1 | cut -d'/' -f1,2 ) - - # Add networking configuration - cat <> /tmp/kind.yaml -networking: - disableDefaultCNI: true - kubeProxyMode: none - podSubnet: $KIND_CIDR_IPV4 - serviceSubnet: $SVC_CIDR_IPV4 -EOT - - kind delete clusters $KIND_CLUSTER_NAME ||: - kind create cluster --name $KIND_CLUSTER_NAME --image "${KIND_IMAGE}":"${K8S_VERSION}" --config "${KIND_CONFIG}" --retain - kind load docker-image --name $KIND_CLUSTER_NAME $OVN_IMAGE - - # When using HA, label nodes to host db. - if [ "$OVN_HA" == true ]; then - kubectl label nodes k8s.ovn.org/ovnkube-db=true --overwrite \ - -l node-role.kubernetes.io/control-plane - if [ "$KIND_NUM_WORKER" -ge 2 ]; then - for n in ovn-worker ovn-worker2; do - # We want OVN HA not Kubernetes HA - # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= - # to choose the nodes where ovn master components will be placed - kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite - done - fi - fi - - # Remove taint, so control-plane nodes can also schedule regular pods - if [ "$KIND_REMOVE_TAINT" == true ]; then - kubectl taint node "$n" node-role.kubernetes.io/master:NoSchedule- \ - -l node-role.kubernetes.io/control-plane ||: - kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- \ - -l node-role.kubernetes.io/control-plane ||: - fi -} - label_ovn_single_node_zones() { KIND_NODES=$(kind_get_nodes) for n in $KIND_NODES; do @@ -455,7 +326,6 @@ label_ovn_multiple_nodes_zones() { create_ovn_kubernetes() { cd ${DIR}/../helm/ovn-kubernetes - MASTER_REPLICAS=$(kubectl get node -l node-role.kubernetes.io/control-plane --no-headers | wc -l) if [[ $KIND_NUM_NODES_PER_ZONE == 1 ]]; then label_ovn_single_node_zones value_file="values-single-node-zone.yaml" @@ -465,10 +335,12 @@ create_ovn_kubernetes() { value_file="values-multi-node-zone.yaml" ovnkube_db_options="" else + label_ovn_ha value_file="values-no-ic.yaml" ovnkube_db_options="--set tags.ovnkube-db-raft=$(if [ "${OVN_HA}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set tags.ovnkube-db=$(if [ "${OVN_HA}" == "false" ]; then echo "true"; else echo "false"; fi)" fi + MASTER_REPLICAS=$(kubectl get node -l node-role.kubernetes.io/control-plane --no-headers | wc -l) echo "value_file=${value_file}" # For multi-pod-subnet case, NET_CIDR_IPV4 is a list of CIDRs separated by comma. # When Helm encounters a comma within a string value in a --set argument, it attempts to parse the comma as a separator @@ -480,9 +352,10 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set k8sAPIServer=${API_URL} \ --set podNetwork="${ESCAPED_NET_CIDR_IPV4}" \ --set serviceNetwork=${SVC_CIDR_IPV4} \ + --set mtu=${OVN_MTU} \ --set ovnkube-master.replicas=${MASTER_REPLICAS} \ - --set global.image.repository=$(get_image) \ - --set global.image.tag=$(get_tag) \ + --set global.image.repository=${OVN_IMAGE%%:*} \ + --set global.image.tag=${OVN_IMAGE##*:} \ --set global.enableAdminNetworkPolicy=true \ --set global.enableMulticast=$(if [ "${OVN_MULTICAST_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableMultiNetwork=$(if [ "${ENABLE_MULTI_NET}" == "true" ]; then echo "true"; else echo "false"; fi) \ @@ -491,12 +364,18 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableDynamicUDNAllocation=$(if [ "${DYNAMIC_UDN_ALLOCATION}" == "true" ]; then echo "true"; else echo "false"; fi) \ $( [ -n "$DYNAMIC_UDN_GRACE_PERIOD" ] && echo "--set global.dynamicUDNGracePeriod=$DYNAMIC_UDN_GRACE_PERIOD" ) \ --set global.enablePreconfiguredUDNAddresses=$(if [ "${ENABLE_PRE_CONF_UDN_ADDR}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableRouteAdvertisements=$(if [ "${ENABLE_ROUTE_ADVERTISEMENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableEVPN=$(if [ "${ENABLE_EVPN}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.advertiseDefaultNetwork=$(if [ "${ADVERTISE_DEFAULT_NETWORK}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.advertisedUDNIsolationMode="${ADVERTISED_UDN_ISOLATION_MODE}" \ --set global.enableHybridOverlay=$(if [ "${OVN_HYBRID_OVERLAY_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableObservability=$(if [ "${OVN_OBSERV_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.emptyLbEvents=$(if [ "${OVN_EMPTY_LB_EVENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableDNSNameResolver=$(if [ "${OVN_ENABLE_DNSNAMERESOLVER}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableNoOverlay=$(if [ "${ENABLE_NO_OVERLAY}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableCoredumps=$(if [ "${ENABLE_COREDUMPS}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.allowICMPNetworkPolicy=$(if [ "${OVN_ALLOW_ICMP_NETPOL}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF ) @@ -504,14 +383,6 @@ EOF eval "${cmd}" } -delete() { - if [ "$KIND_INSTALL_METALLB" == true ]; then - destroy_metallb - fi - helm uninstall ovn-kubernetes && sleep 5 ||: - kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" -} - install_online_ovn_kubernetes_crds() { # NOTE: When you update vendoring versions for the ANP & BANP APIs, we must update the version of the CRD we pull from in the below URL run_kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/network-policy-api/v0.1.5/config/crd/experimental/policy.networking.k8s.io_adminnetworkpolicies.yaml @@ -529,6 +400,7 @@ if [ "$ENABLE_COREDUMPS" == true ]; then setup_coredumps fi detect_apiserver_url +install_ovn_image docker_disable_ipv6 coredns_patch if [ "$OVN_ENABLE_DNSNAMERESOLVER" == true ]; then @@ -539,6 +411,13 @@ if [ "$OVN_ENABLE_DNSNAMERESOLVER" == true ]; then add_ocp_dnsnameresolver_to_coredns_config update_coredns_deployment_image fi +if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + deploy_frr_external_container + deploy_bgp_external_server +fi +if [ "$KIND_REMOVE_TAINT" == true ]; then + remove_no_schedule_taint +fi create_ovn_kubernetes install_online_ovn_kubernetes_crds @@ -574,4 +453,8 @@ if [ "$KIND_INSTALL_KUBEVIRT" == true ]; then install_kubevirt fi +if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + install_frr_k8s +fi + interconnect_arg_check diff --git a/contrib/kind.sh b/contrib/kind.sh index 2d489b9f01..a68b922d95 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -3,26 +3,8 @@ # Returns the full directory name of the script DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -# Source the kind-common file from the same directory where this script is located -source "${DIR}/kind-common" - -# Some environments (Fedora32,31 on desktop), have problems when the cluster -# is deleted directly with kind `kind delete cluster --name ovn`, it restarts the host. -# The root cause is unknown, this also can not be reproduced in Ubuntu 20.04 or -# with Fedora32 Cloud, but it does not happen if we clean first the ovn-kubernetes resources. -delete() { - OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} - - if [ "$KIND_INSTALL_METALLB" == true ]; then - destroy_metallb - fi - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then - destroy_bgp - fi - timeout 5 kubectl --kubeconfig "${KUBECONFIG}" delete namespace ovn-kubernetes || true - sleep 5 - kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" -} +# Source the kind-common.sh file from the same directory where this script is located +source "${DIR}/kind-common.sh" usage() { echo "usage: kind.sh [[[-cf |--config-file ] [-kt|--keep-taint] [-ha|--ha-enabled]" @@ -60,6 +42,7 @@ usage() { echo " [-dug | --dynamic-udn-removal-grace-period ]" echo " [-adv | --advertise-default-network]" echo " [-nqe | --network-qos-enable]" + echo " [-noe | --no-overlay-enable]" echo " [--isolated]" echo " [--enable-coredumps]" echo " [-dns | --enable-dnsnameresolver]" @@ -142,6 +125,8 @@ echo "-dug | --dynamic-udn-removal-grace-period Configure the grac echo "-adv | --advertise-default-network Applies a RouteAdvertisements configuration to advertise the default network on all nodes" echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" +echo "-noe | --no-overlay-enable Enable no overlay" +echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -372,6 +357,8 @@ parse_args() { -ic | --enable-interconnect ) OVN_ENABLE_INTERCONNECT=true IC_ARG_PROVIDED=true ;; + -noe | --no-overlay-enable) ENABLE_NO_OVERLAY=true + ;; --disable-ovnkube-identity) OVN_ENABLE_OVNKUBE_IDENTITY=false ;; -mtu ) shift @@ -391,6 +378,8 @@ parse_args() { ;; -mps| --multi-pod-subnet ) MULTI_POD_SUBNET=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -h | --help ) usage exit ;; @@ -417,6 +406,7 @@ print_params() { echo "KIND_INSTALL_PLUGINS = $KIND_INSTALL_PLUGINS" echo "KIND_INSTALL_KUBEVIRT = $KIND_INSTALL_KUBEVIRT" echo "KIND_OPT_OUT_KUBEVIRT_IPAM = $KIND_OPT_OUT_KUBEVIRT_IPAM" + echo "OCI_BIN = $OCI_BIN" echo "OVN_HA = $OVN_HA" echo "RUN_IN_CONTAINER = $RUN_IN_CONTAINER" echo "KIND_CLUSTER_NAME = $KIND_CLUSTER_NAME" @@ -477,6 +467,7 @@ print_params() { echo "ENABLE_PRE_CONF_UDN_ADDR = $ENABLE_PRE_CONF_UDN_ADDR" echo "DYNAMIC_UDN_ALLOCATION = $DYNAMIC_UDN_ALLOCATION" echo "DYNAMIC_UDN_GRACE_PERIOD = $DYNAMIC_UDN_GRACE_PERIOD" + echo "ENABLE_NO_OVERLAY = $ENABLE_NO_OVERLAY" echo "OVN_ENABLE_INTERCONNECT = $OVN_ENABLE_INTERCONNECT" if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then echo "KIND_NUM_NODES_PER_ZONE = $KIND_NUM_NODES_PER_ZONE" @@ -493,70 +484,10 @@ print_params() { echo "OVN_MTU= $OVN_MTU" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "" } -install_jinjanator_renderer() { - # ensure jinjanator renderer installed - pipx install jinjanator[yaml] - pipx ensurepath --force >/dev/null - export PATH=~/.local/bin:$PATH -} - -check_dependencies() { - if ! command_exists curl ; then - echo "Dependency not met: Command not found 'curl'" - exit 1 - fi - - if ! command_exists kubectl ; then - echo "'kubectl' not found, installing" - setup_kubectl_bin - fi - - if ! command_exists kind ; then - echo "Dependency not met: Command not found 'kind'" - exit 1 - fi - - local kind_min="0.27.0" - local kind_cur - kind_cur=$(kind version -q) - if [ "$(echo -e "$kind_min\n$kind_cur" | sort -V | head -1)" != "$kind_min" ]; then - echo "Dependency not met: expected kind version >= $kind_min but have $kind_cur" - exit 1 - fi - - if ! command_exists jq ; then - echo "Dependency not met: Command not found 'jq'" - exit 1 - fi - - if ! command_exists awk ; then - echo "Dependency not met: Command not found 'awk'" - exit 1 - fi - - if ! command_exists jinjanate ; then - if ! command_exists pipx ; then - echo "Dependency not met: 'jinjanator' not installed and cannot install with 'pipx'" - exit 1 - fi - echo "'jinjanate' not found, installing with 'pipx'" - install_jinjanator_renderer - fi - - if ! command_exists docker && ! command_exists podman; then - echo "Dependency not met: Neither docker nor podman found" - exit 1 - fi - - if command_exists podman && ! command_exists skopeo; then - echo "Dependency not met: skopeo not installed. Run the following command to install it: 'sudo dnf install skopeo'" - exit 1 - fi -} - OPENSSL="" set_openssl_binary() { for s in openssl openssl3; do @@ -580,47 +511,24 @@ set_default_params() { # Set default values # Used for multi cluster setups - KIND_CREATE=${KIND_CREATE:-true} KIND_ADD_NODES=${KIND_ADD_NODES:-false} - KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} - # Setup KUBECONFIG patch based on cluster-name - export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} - # Scrub any existing kubeconfigs at the path - if [ "${KIND_CREATE}" == true ]; then - rm -f ${KUBECONFIG} - fi MANIFEST_OUTPUT_DIR=${MANIFEST_OUTPUT_DIR:-${DIR}/../dist/yaml} if [ ${KIND_CLUSTER_NAME} != "ovn" ]; then MANIFEST_OUTPUT_DIR="${DIR}/../dist/yaml/${KIND_CLUSTER_NAME}" fi RUN_IN_CONTAINER=${RUN_IN_CONTAINER:-false} OVN_GATEWAY_MODE=${OVN_GATEWAY_MODE:-shared} - KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} - KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} - KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} - KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} KIND_OPT_OUT_KUBEVIRT_IPAM=${KIND_OPT_OUT_KUBEVIRT_IPAM:-false} - OVN_HA=${OVN_HA:-false} - KIND_LOCAL_REGISTRY=${KIND_LOCAL_REGISTRY:-false} KIND_LOCAL_REGISTRY_NAME=${KIND_LOCAL_REGISTRY_NAME:-kind-registry} KIND_LOCAL_REGISTRY_PORT=${KIND_LOCAL_REGISTRY_PORT:-5000} KIND_DNS_DOMAIN=${KIND_DNS_DOMAIN:-"cluster.local"} - KIND_CONFIG=${KIND_CONFIG:-${DIR}/kind.yaml.j2} - KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} - PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} - PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} ENABLE_IPSEC=${ENABLE_IPSEC:-false} - OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} OVN_DISABLE_SNAT_MULTIPLE_GWS=${OVN_DISABLE_SNAT_MULTIPLE_GWS:-false} OVN_DISABLE_FORWARDING=${OVN_DISABLE_FORWARDING:=false} OVN_ENCAP_PORT=${OVN_ENCAP_PORT:-""} OVN_DISABLE_PKT_MTU_CHECK=${OVN_DISABLE_PKT_MTU_CHECK:-false} - OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} - OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} KIND_ALLOW_SYSTEM_WRITES=${KIND_ALLOW_SYSTEM_WRITES:-false} - OVN_IMAGE=${OVN_IMAGE:-local} - OVN_REPO=${OVN_REPO:-""} - OVN_GITREF=${OVN_GITREF:-""} + MASTER_LOG_LEVEL=${MASTER_LOG_LEVEL:-5} NODE_LOG_LEVEL=${NODE_LOG_LEVEL:-5} DBCHECKER_LOG_LEVEL=${DBCHECKER_LOG_LEVEL:-5} @@ -635,67 +543,14 @@ set_default_params() { if [ "$OVN_ENABLE_EX_GW_NETWORK_BRIDGE" == true ]; then OVN_EX_GW_NETWORK_INTERFACE="eth1" fi - MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} - # Input not currently validated. Modify outside script at your own risk. - # These are the same values defaulted to in KIND code (kind/default.go). - # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host - # so it needs to use a larger subnet - # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 - NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} - NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} - if [ "$MULTI_POD_SUBNET" == true ]; then - NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" - NET_CIDR_IPV6="fd00:10:243::/63/64,fd00:10:244::/48" - fi - NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} - SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} - SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} - JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} - JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} - MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} - MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} - TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} - TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} - METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} - METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} - BGP_SERVER_NET_SUBNET_IPV4=${BGP_SERVER_NET_SUBNET_IPV4:-172.26.0.0/16} - BGP_SERVER_NET_SUBNET_IPV6=${BGP_SERVER_NET_SUBNET_IPV6:-fc00:f853:ccd:e796::/64} - KIND_NUM_MASTER=1 - OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} OVN_ENABLE_OVNKUBE_IDENTITY=${OVN_ENABLE_OVNKUBE_IDENTITY:-true} - OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} - - - if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then - echo "Compact mode cannot be used together with Interconnect" - exit 1 - fi - - if [ "$OVN_HA" == true ]; then - KIND_NUM_MASTER=3 - KIND_NUM_WORKER=${KIND_NUM_WORKER:-0} - else - KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} - fi KIND_NUM_INFRA=${KIND_NUM_INFRA:-0} KIND_INSTALL_PROMETHEUS=${KIND_INSTALL_PROMETHEUS:-false} - if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then - KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} - - TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) - if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then - echo "(Total k8s nodes / number of nodes per zone) should be zero" - exit 1 - fi - fi - OVN_HOST_NETWORK_NAMESPACE=${OVN_HOST_NETWORK_NAMESPACE:-ovn-host-network} OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} - METRICS_IP=${METRICS_IP:-""} - OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-identity ovnkube-zone-controller ovnkube-control-plane ovnkube-master ovnkube-node"} OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} @@ -707,61 +562,6 @@ set_default_params() { if [ "$OVN_DUMMY_GATEWAY_BRIDGE" == true ]; then OVN_GATEWAY_OPTS="--allow-no-uplink --gateway-interface=br-ex" fi - ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} - ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} - if [ "$ENABLE_NETWORK_SEGMENTATION" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then - echo "Network segmentation (UDN) requires multi-network to be enabled (-mne)" - exit 1 - fi - - ENABLE_ROUTE_ADVERTISEMENTS=${ENABLE_ROUTE_ADVERTISEMENTS:-false} - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then - echo "Route advertisements requires multi-network to be enabled (-mne)" - exit 1 - fi - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != true ]; then - echo "Route advertisements requires interconnect to be enabled (-ic)" - exit 1 - fi - - ENABLE_EVPN=${ENABLE_EVPN:-false} - if [ "$ENABLE_EVPN" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then - echo "EVPN requires Route advertisements to be enabled (-rae)" - exit 1 - fi - - ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} - if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Preconfigured UDN addresses requires network-segmentation to be enabled (-nse)" - exit 1 - fi - if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $OVN_ENABLE_INTERCONNECT != true ]]; then - echo "Preconfigured UDN addresses requires interconnect to be enabled (-ic)" - exit 1 - fi - - ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} - if [[ $ENABLE_NETWORK_CONNECT == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Network connect requires network-segmentation to be enabled (-nse)" - exit 1 - fi - - DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} - if [[ $DYNAMIC_UDN_ALLOCATION == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Dynamic UDN allocation requires network-segmentation to be enabled (-nse)" - exit 1 - fi - DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-120s} - ADVERTISED_UDN_ISOLATION_MODE=${ADVERTISED_UDN_ISOLATION_MODE:-strict} - ADVERTISE_DEFAULT_NETWORK=${ADVERTISE_DEFAULT_NETWORK:-false} - OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} - if [ "$OVN_COMPACT_MODE" == true ]; then - KIND_NUM_WORKER=0 - fi - OVN_MTU=${OVN_MTU:-1400} - OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} - OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} - ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} } check_ipv6() { @@ -885,74 +685,6 @@ scale_kind_cluster() { fi } -create_kind_cluster() { - # Output of the jinjanate command - KIND_CONFIG_LCL=${DIR}/kind-${KIND_CLUSTER_NAME}.yaml - - ovn_ip_family=${IP_FAMILY} \ - ovn_ha=${OVN_HA} \ - net_cidr="${KIND_CIDR}" \ - svc_cidr=${SVC_CIDR} \ - use_local_registy=${KIND_LOCAL_REGISTRY} \ - dns_domain=${KIND_DNS_DOMAIN} \ - ovn_num_master=${KIND_NUM_MASTER} \ - ovn_num_worker=${KIND_NUM_WORKER} \ - kind_num_infra=${KIND_NUM_INFRA} \ - cluster_log_level=${KIND_CLUSTER_LOGLEVEL:-4} \ - kind_local_registry_port=${KIND_LOCAL_REGISTRY_PORT} \ - kind_local_registry_name=${KIND_LOCAL_REGISTRY_NAME} \ - jinjanate "${KIND_CONFIG}" -o "${KIND_CONFIG_LCL}" - - # Create KIND cluster. For additional debug, add '--verbosity ': 0 None .. 3 Debug - if kind get clusters | grep "${KIND_CLUSTER_NAME}"; then - delete - fi - - if [[ "${KIND_LOCAL_REGISTRY}" == true ]]; then - create_local_registry - fi - - kind create cluster --name "${KIND_CLUSTER_NAME}" --kubeconfig "${KUBECONFIG}" --image "${KIND_IMAGE}":"${K8S_VERSION}" --config=${KIND_CONFIG_LCL} --retain - - cat "${KUBECONFIG}" -} - -set_ovn_image() { - # if we're using the local registry and still need to build, push to local registry - if [ "$KIND_LOCAL_REGISTRY" == true ];then - OVN_IMAGE="localhost:5000/ovn-daemonset-fedora:latest" - else - OVN_IMAGE="localhost/ovn-daemonset-fedora:dev" - fi -} - -build_ovn_image() { - local push_args="" - if [ "$OCI_BIN" == "podman" ]; then - # docker doesn't perform tls check by default only podman does, hence we need to disable it for podman. - push_args="--tls-verify=false" - fi - - if [ "$OVN_IMAGE" == local ]; then - set_ovn_image - - # Build image - make -C ${DIR}/../dist/images IMAGE="${OVN_IMAGE}" OVN_REPO="${OVN_REPO}" OVN_GITREF="${OVN_GITREF}" OCI_BIN="${OCI_BIN}" fedora-image - - # store in local registry - if [ "$KIND_LOCAL_REGISTRY" == true ];then - echo "Pushing built image to local $OCI_BIN registry" - $OCI_BIN push $push_args "$OVN_IMAGE" - fi - # We should push to local registry if image is not remote - elif [ "${OVN_IMAGE}" != "" -a "${KIND_LOCAL_REGISTRY}" == true ] && (echo "$OVN_IMAGE" | grep / -vq); then - local local_registry_ovn_image="localhost:5000/${OVN_IMAGE}" - $OCI_BIN tag "$OVN_IMAGE" $local_registry_ovn_image - OVN_IMAGE=$local_registry_ovn_image - $OCI_BIN push $push_args "$OVN_IMAGE" - fi -} - create_ovn_kube_manifests() { local ovnkube_image=${OVN_IMAGE} if [ "$KIND_LOCAL_REGISTRY" == true ];then @@ -1022,6 +754,7 @@ create_ovn_kube_manifests() { --evpn-enable="${ENABLE_EVPN}" \ --advertise-default-network="${ADVERTISE_DEFAULT_NETWORK}" \ --advertised-udn-isolation-mode="${ADVERTISED_UDN_ISOLATION_MODE}" \ + --no-overlay-enable="${ENABLE_NO_OVERLAY}" \ --ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}" \ --metrics-ip="${METRICS_IP}" \ --compact-mode="${OVN_COMPACT_MODE}" \ @@ -1032,15 +765,11 @@ create_ovn_kube_manifests() { --network-qos-enable="${OVN_NETWORK_QOS_ENABLE}" \ --mtu="${OVN_MTU}" \ --enable-dnsnameresolver="${OVN_ENABLE_DNSNAMERESOLVER}" \ - --mtu="${OVN_MTU}" \ - --enable-observ="${OVN_OBSERV_ENABLE}" + --enable-observ="${OVN_OBSERV_ENABLE}" \ + --allow-icmp-netpol="${OVN_ALLOW_ICMP_NETPOL}" popd } -install_ovn_image() { - install_image ${OVN_IMAGE} -} - install_ovn_global_zone() { if [ "$OVN_HA" == true ]; then run_kubectl apply -f ovnkube-db-raft.yaml @@ -1131,20 +860,12 @@ install_ovn() { run_kubectl apply -f rbac-ovnkube-master.yaml run_kubectl apply -f rbac-ovnkube-node.yaml run_kubectl apply -f rbac-ovnkube-db.yaml - MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") - # We want OVN HA not Kubernetes HA - # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= - # to choose the nodes where ovn master components will be placed - for n in $MASTER_NODES; do - kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite - if [ "$KIND_REMOVE_TAINT" == true ]; then - # do not error if it fails to remove the taint - # remove both master and control-plane taints until master is removed from 1.25 - # // https://github.com/kubernetes/kubernetes/pull/107533 - kubectl taint node "$n" node-role.kubernetes.io/master:NoSchedule- || true - kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- || true - fi - done + if [ "${OVN_HA}" == "true" ]; then + label_ovn_ha + fi + if [ "$KIND_REMOVE_TAINT" == true ]; then + remove_no_schedule_taint + fi run_kubectl apply -f ovs-node.yaml @@ -1279,7 +1000,7 @@ add_dns_hostnames() { done } -check_dependencies +check_common_dependencies # In order to allow providing arguments with spaces, e.g. "-vconsole:info -vfile:info" # the original command was replaced by parse_args "$@" @@ -1372,7 +1093,7 @@ if [ "$KIND_INSTALL_KUBEVIRT" == true ]; then fi fi if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then - install_ffr_k8s + install_frr_k8s fi interconnect_arg_check diff --git a/contrib/kind.yaml.j2 b/contrib/kind.yaml.j2 index 0f987d30bc..ee6d719ea9 100644 --- a/contrib/kind.yaml.j2 +++ b/contrib/kind.yaml.j2 @@ -14,7 +14,7 @@ networking: {%- if ovn_ip_family %} ipFamily: {{ ovn_ip_family }} {%- endif %} -{%- if use_local_registy == "true"%} +{%- if use_local_registry == "true"%} containerdConfigPatches: - |- [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:{{ kind_local_registry_port }}"] diff --git a/contrib/perf/generate_perf_report.py b/contrib/perf/generate_perf_report.py index 7139033289..0e80018f65 100644 --- a/contrib/perf/generate_perf_report.py +++ b/contrib/perf/generate_perf_report.py @@ -18,9 +18,10 @@ class MetricsProcessor: """Process and analyze metrics data from JSON files.""" - def __init__(self, metrics_dir: str = "."): + def __init__(self, metrics_dir: str = ".", workload: str = "kubelet-density-cni"): + self.workload = workload self.metrics_dir = metrics_dir - self.pod_latency_file = "podLatencyMeasurement-kubelet-density-cni.json" + self.pod_latency_file = f"podLatencyMeasurement-{self.workload}.json" self.container_cpu_file = "containerCPU.json" self.container_memory_file = "containerMemory.json" @@ -139,11 +140,12 @@ def get_container_type_from_container(self, container_name: str, pod_name: str) class ReportGenerator: """Generate text report from processed metrics data.""" - def __init__(self, title: str = "Kubernetes Workload Metrics Report"): + def __init__(self, title: str = "Kubernetes Workload Metrics Report", workload: str = "kubelet-density-cni"): self.title = title + self.workload = workload def generate_report(self, pod_latency: Dict[str, Any], ovn_cpu: Dict[str, Any], - ovn_memory: Dict[str, Any]) -> str: + ovn_memory: Dict[str, Any] ) -> str: """Generate complete text report.""" stats = pod_latency['stats'] @@ -151,7 +153,7 @@ def generate_report(self, pod_latency: Dict[str, Any], ovn_cpu: Dict[str, Any], # Header report_lines.append("# 📊 Kubernetes Workload Metrics Report") - report_lines.append("## kubelet-density-cni Performance Results") + report_lines.append(f"## {self.workload} Performance Results") report_lines.append("") report_lines.append(f"**Generated on:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}") report_lines.append("") @@ -291,6 +293,8 @@ def detect_pr_environment() -> Optional[str]: def main(): """Main function to generate the performance report.""" parser = argparse.ArgumentParser(description='Generate Kubernetes workload metrics report') + parser.add_argument('--workload', default='kubelet-density-cni', + help='Workload name (default: kubelet-density-cni)') parser.add_argument('--metrics-dir', default='.', help='Directory containing JSON metrics files (default: current directory)') parser.add_argument('--output', default='performance_report.md', @@ -310,8 +314,8 @@ def main(): print() # Initialize processor and generator - processor = MetricsProcessor(args.metrics_dir) - generator = ReportGenerator(args.title) + processor = MetricsProcessor(args.metrics_dir, args.workload) + generator = ReportGenerator(args.title, args.workload) # Load and process data print("📊 Loading and processing metrics data...") diff --git a/contrib/perf/workloads/cudn-density-l2-noPods.yml b/contrib/perf/workloads/cudn-density-l2-noPods.yml new file mode 100644 index 0000000000..401199555a --- /dev/null +++ b/contrib/perf/workloads/cudn-density-l2-noPods.yml @@ -0,0 +1,46 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: cudn-density-l2-nopods + jobIterations: 200 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: cudn-density-l2-nopods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + # Disabling churn until https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5883 is resolved + #churnConfig: + # percent: 10 + # cycles: 10 + # mode: objects + objects: + - objectTemplate: workloads/templates/udn-density/cudn_l2.yml + replicas: 1 + - objectTemplate: workloads/templates/udn-density/cudn_ns.yml + replicas: 1 + diff --git a/contrib/perf/workloads/kubelet-density-cni.yml b/contrib/perf/workloads/kubelet-density-cni.yml index 7d97f60579..dd3f193343 100644 --- a/contrib/perf/workloads/kubelet-density-cni.yml +++ b/contrib/perf/workloads/kubelet-density-cni.yml @@ -39,11 +39,11 @@ jobs: mode: objects objects: - - objectTemplate: templates/webserver-deployment.yml + - objectTemplate: workloads/templates/kubelet-density-cni/webserver-deployment.yml replicas: 1 - - objectTemplate: templates/webserver-service.yml + - objectTemplate: workloads/templates/kubelet-density-cni/webserver-service.yml replicas: 1 - - objectTemplate: templates/curl-deployment.yml + - objectTemplate: workloads/templates/kubelet-density-cni/curl-deployment.yml replicas: 1 diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml b/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml new file mode 100644 index 0000000000..d437769475 --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml @@ -0,0 +1,40 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: curl-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: curl-{{.Replica}}-{{.Iteration}} + spec: + nodeSelector: + node-role.kubernetes.io/worker: "" + containers: + - name: curlapp + image: quay.io/cloud-bulldozer/curl:latest + command: ["sleep", "inf"] + env: + - name: WEBSERVER_HOSTNAME + value: webserver-{{.Replica}}-{{.Iteration}} + - name: WEBSERVER_PORT + value: "8080" + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + startupProbe: + exec: + command: + - "/bin/sh" + - "-c" + - "curl ${WEBSERVER_HOSTNAME}:${WEBSERVER_PORT}" + periodSeconds: 1 + timeoutSeconds: 1 + failureThreshold: 600 + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: curl-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml new file mode 100644 index 0000000000..b9c904154a --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml @@ -0,0 +1,28 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: webserver-{{.Replica}}-{{.Iteration}} + spec: + nodeSelector: + node-role.kubernetes.io/worker: "" + containers: + - name: webserver + image: quay.io/cloud-bulldozer/sampleapp:latest + ports: + - containerPort: 8080 + protocol: TCP + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: webserver-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml new file mode 100644 index 0000000000..a569151b82 --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml @@ -0,0 +1,12 @@ +kind: Service +apiVersion: v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + selector: + name: webserver-{{.Replica}}-{{.Iteration}} + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/contrib/perf/workloads/templates/udn-density/cudn_l2.yml b/contrib/perf/workloads/templates/udn-density/cudn_l2.yml new file mode 100644 index 0000000000..81be6ac9da --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/cudn_l2.yml @@ -0,0 +1,14 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: l2-network-{{.Iteration}} +spec: + namespaceSelector: + matchLabels: + cudn-scale: "{{.JobName}}-{{.Iteration}}" + network: + topology: Layer2 + layer2: + role: Primary + subnets: ["10.132.0.0/16"] \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/cudn_ns.yml b/contrib/perf/workloads/templates/udn-density/cudn_ns.yml new file mode 100644 index 0000000000..8f6387a459 --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/cudn_ns.yml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: "{{.JobName}}-{{.Iteration}}" + labels: + k8s.ovn.org/primary-user-defined-network: "" + cudn-scale: "{{.JobName}}-{{.Iteration}}" \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/deployment-client.yml b/contrib/perf/workloads/templates/udn-density/deployment-client.yml new file mode 100644 index 0000000000..d153adfabe --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/deployment-client.yml @@ -0,0 +1,57 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: client-{{.Replica}} +spec: + replicas: 1 + selector: + matchLabels: + name: client-{{.Replica}} + template: + metadata: + labels: + name: client-{{.Replica}} + app: client + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: client + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + - key: node-role.kubernetes.io/infra + operator: DoesNotExist + - key: node-role.kubernetes.io/workload + operator: DoesNotExist + containers: + - name: client-app + image: quay.io/cloud-bulldozer/curl:latest + command: ["sleep", "inf"] + resources: + requests: + memory: "10Mi" + cpu: "10m" + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + volumeMounts: + - name: podinfo + mountPath: /etc/podlabels + volumes: + - name: podinfo + downwardAPI: + items: + - path: "labels" + fieldRef: + fieldPath: metadata.labels + restartPolicy: Always + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/udn-density/udn_l2.yml b/contrib/perf/workloads/templates/udn-density/udn_l2.yml new file mode 100644 index 0000000000..fe0c222dd6 --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/udn_l2.yml @@ -0,0 +1,10 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: l2-network-{{.Iteration}} +spec: + topology: Layer2 + layer2: + role: Primary + subnets: ["10.132.0.0/16"] \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/udn_l3.yml b/contrib/perf/workloads/templates/udn-density/udn_l3.yml new file mode 100644 index 0000000000..0a8de7688d --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/udn_l3.yml @@ -0,0 +1,13 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: l3-network-{{.Iteration}} +spec: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 10.132.0.0/16 + hostSubnet: 24 + mtu: 1300 diff --git a/contrib/perf/workloads/udn-density-l2-noPods.yml b/contrib/perf/workloads/udn-density-l2-noPods.yml new file mode 100644 index 0000000000..310f3c87dd --- /dev/null +++ b/contrib/perf/workloads/udn-density-l2-noPods.yml @@ -0,0 +1,48 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: udn-density-l2-nopods + jobIterations: 200 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: udn-density-l2-nopods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + churnConfig: + percent: 10 + cycles: 5 + delay: 2m + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + k8s.ovn.org/primary-user-defined-network: "" + objects: + - objectTemplate: workloads/templates/udn-density/udn_l2.yml + replicas: 1 diff --git a/contrib/perf/workloads/udn-density-l2-pods.yml b/contrib/perf/workloads/udn-density-l2-pods.yml new file mode 100644 index 0000000000..d6ba0170cd --- /dev/null +++ b/contrib/perf/workloads/udn-density-l2-pods.yml @@ -0,0 +1,50 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: udn-density-l2-pods + jobIterations: 100 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: udn-density-l2-pods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + churnConfig: + percent: 10 + cycles: 5 + delay: 2m + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + k8s.ovn.org/primary-user-defined-network: "" + objects: + - objectTemplate: workloads/templates/udn-density/udn_l2.yml + replicas: 1 + - objectTemplate: workloads/templates/udn-density/deployment-client.yml + replicas: 1 diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index ecdc231785..15ad3b1e25 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -69,7 +69,7 @@ OVN_EGRESSFIREWALL_ENABLE= OVN_EGRESSQOS_ENABLE= OVN_EGRESSSERVICE_ENABLE= OVN_MULTI_NETWORK_ENABLE= -OVN_NETWORK_SEGMENTATION_ENABLE= +OVN_NETWORK_SEGMENTATION_ENABLE="false" OVN_NETWORK_CONNECT_ENABLE= OVN_PRE_CONF_UDN_ADDR_ENABLE= OVN_DYNAMIC_UDN_ALLOCATION= @@ -78,6 +78,7 @@ OVN_ROUTE_ADVERTISEMENTS_ENABLE= OVN_EVPN_ENABLE= OVN_ADVERTISE_DEFAULT_NETWORK= OVN_ADVERTISED_UDN_ISOLATION_MODE= +OVN_NO_OVERLAY_ENABLE= OVN_V4_JOIN_SUBNET="" OVN_V6_JOIN_SUBNET="" OVN_V4_MASQUERADE_SUBNET="" @@ -93,6 +94,7 @@ OVN_IPFIX_CACHE_ACTIVE_TIMEOUT="" OVN_HOST_NETWORK_NAMESPACE="" OVN_EX_GW_NETWORK_INTERFACE="" OVNKUBE_NODE_MGMT_PORT_NETDEV="" +OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME="" OVNKUBE_CONFIG_DURATION_ENABLE= OVNKUBE_METRICS_SCALE_ENABLE= OVN_STATELESS_NETPOL_ENABLE="false" @@ -104,6 +106,8 @@ OVN_NETWORK_QOS_ENABLE= OVN_ENABLE_DNSNAMERESOLVER="false" OVN_NOHOSTSUBNET_LABEL="" OVN_DISABLE_REQUESTEDCHASSIS="false" +OVN_ALLOW_ICMP_NETPOL="false" + # IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test, # it will render only the parts in ovn-setup.yaml related to RBAC permissions. IN_UPGRADE= @@ -301,6 +305,9 @@ while [ "$1" != "" ]; do --advertised-udn-isolation-mode) OVN_ADVERTISED_UDN_ISOLATION_MODE=$VALUE ;; + --no-overlay-enable) + OVN_NO_OVERLAY_ENABLE=$VALUE + ;; --egress-service-enable) OVN_EGRESSSERVICE_ENABLE=$VALUE ;; @@ -352,6 +359,9 @@ while [ "$1" != "" ]; do --ovnkube-node-mgmt-port-dp-resource-name) OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME=$VALUE ;; + --mgmt-port-vfs-count) + MGMT_PORT_VFS_COUNT=$VALUE + ;; --ovnkube-config-duration-enable) OVNKUBE_CONFIG_DURATION_ENABLE=$VALUE ;; @@ -394,6 +404,9 @@ while [ "$1" != "" ]; do --enable-dnsnameresolver) OVN_ENABLE_DNSNAMERESOLVER=$VALUE ;; + --allow-icmp-netpol) + OVN_ALLOW_ICMP_NETPOL=$VALUE + ;; --enable-observ) OVN_OBSERV_ENABLE=$VALUE ;; @@ -403,9 +416,33 @@ while [ "$1" != "" ]; do --no-hostsubnet-label) OVN_NOHOSTSUBNET_LABEL=$VALUE ;; - --ovn_disable_requestedchassis) + --ovn-disable-requestedchassis) OVN_DISABLE_REQUESTEDCHASSIS=$value ;; + --metrics-port) + METRICS_PORT=$value + ;; + --dpuhost-cluster-net-cidr) + DPUHOST_CLUSTER_NET_CIDR=$value + ;; + --dpuhost-cluster-svc-cidr) + DPUHOST_CLUSTER_SVC_CIDR=$value + ;; + --dpuhost-cluster-k8s-apiserver) + DPUHOST_CLUSTER_K8S_APISERVER=$value + ;; + --dpuhost-cluster-k8s-token) + DPUHOST_CLUSTER_K8S_TOKEN=$value + ;; + --dpuhost-cluster-k8s-cacert-data) + DPUHOST_CLUSTER_K8S_CACERT_DATA=$value + ;; + --dpuhost-cluster-k8s-token-file) + DPUHOST_CLUSTER_K8S_TOKEN_FILE=$value + ;; + --dpuhost-cluster-k8s-cacert) + DPUHOST_CLUSTER_K8S_CACERT=$value + ;; *) echo "WARNING: unknown parameter \"$PARAM\"" exit 1 @@ -506,6 +543,8 @@ ovn_advertise_default_network=${OVN_ADVERTISE_DEFAULT_NETWORK} echo "ovn_advertise_default_network: ${ovn_advertise_default_network}" ovn_advertised_udn_isolation_mode=${OVN_ADVERTISED_UDN_ISOLATION_MODE} echo "ovn_advertised_udn_isolation_mode: ${ovn_advertised_udn_isolation_mode}" +ovn_no_overlay_enable=${OVN_NO_OVERLAY_ENABLE} +echo "ovn_no_overlay_enable: ${ovn_no_overlay_enable}" ovn_hybrid_overlay_net_cidr=${OVN_HYBRID_OVERLAY_NET_CIDR} echo "ovn_hybrid_overlay_net_cidr: ${ovn_hybrid_overlay_net_cidr}" ovn_disable_snat_multiple_gws=${OVN_DISABLE_SNAT_MULTIPLE_GWS} @@ -578,6 +617,10 @@ ovn_ex_gw_networking_interface=${OVN_EX_GW_NETWORK_INTERFACE} echo "ovn_ex_gw_networking_interface: ${ovn_ex_gw_networking_interface}" ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV} echo "ovnkube_node_mgmt_port_netdev: ${ovnkube_node_mgmt_port_netdev}" +ovnkube_node_mgmt_port_dp_resource_name=${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME} +echo "ovnkube_node_mgmt_port_dp_resource_name: ${ovnkube_node_mgmt_port_dp_resource_name}" +mgmt_port_vfs_count=${MGMT_PORT_VFS_COUNT:-1} +echo "mgmt_port_vfs_count: ${mgmt_port_vfs_count}" ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE} echo "ovnkube_config_duration_enable: ${ovnkube_config_duration_enable}" ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE} @@ -615,6 +658,9 @@ echo "ovn_network_qos_enable: ${ovn_network_qos_enable}" ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER} echo "ovn_enable_dnsnameresolver: ${ovn_enable_dnsnameresolver}" +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL} +echo "ovn_allow_icmp_netpol: ${ovn_allow_icmp_netpol}" + ovn_observ_enable=${OVN_OBSERV_ENABLE} echo "ovn_observ_enable: ${ovn_observ_enable}" @@ -662,6 +708,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -722,6 +769,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -787,9 +835,14 @@ ovn_image=${image} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ + ovnkube_node_mgmt_port_dp_resource_name=${ovnkube_node_mgmt_port_dp_resource_name} \ + mgmt_port_vfs_count=${mgmt_port_vfs_count} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_network_qos_enable=${ovn_network_qos_enable} \ metrics_ip=${metrics_ip} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ + ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovnkube_app_name=ovnkube-node-dpu-host \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node-dpu-host.yaml @@ -830,6 +883,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ @@ -846,6 +900,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ ovn_nohostsubnet_label=${ovn_nohostsubnet_label} \ ovn_disable_requestedchassis=${ovn_disable_requestedchassis} \ @@ -888,6 +943,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ @@ -901,6 +957,7 @@ ovn_image=${ovnkube_image} \ ovn_v6_transit_subnet=${ovn_v6_transit_subnet} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -976,6 +1033,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -1008,10 +1066,107 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml +# ovnkube-single-node-zone-dpu +dpuhost_cluster_net_cidr=${DPUHOST_CLUSTER_NET_CIDR:-"10.244.0.0/16/24"} +dpuhost_cluster_svc_cidr=${DPUHOST_CLUSTER_SVC_CIDR:-"10.96.0.0/16"} +dpuhost_cluster_k8s_apiserver=${DPUHOST_CLUSTER_K8S_APISERVER:-"https://172.25.0.2:6443"} +dpuhost_cluster_k8s_token=${DPUHOST_CLUSTER_K8S_TOKEN:-""} +dpuhost_cluster_k8s_cacert_data=${DPUHOST_CLUSTER_K8S_CACERT_DATA:-""} +dpuhost_cluster_k8s_token_file=${DPUHOST_CLUSTER_K8S_TOKEN_FILE:-""} +dpuhost_cluster_k8s_cacert=${DPUHOST_CLUSTER_K8S_CACERT:-""} +mtu=${OVN_MTU:-1400} +metrics_port=${METRICS_PORT:-9476} +echo "dpuhost_cluster_net_cidr: ${dpuhost_cluster_net_cidr}" +echo "dpuhost_cluster_svc_cidr: ${dpuhost_cluster_svc_cidr}" +echo "dpuhost_cluster_k8s_apiserver: ${dpuhost_cluster_k8s_apiserver}" +echo "dpuhost_cluster_k8s_token: ${dpuhost_cluster_k8s_token}" +echo "dpuhost_cluster_k8s_cacert_data: ${dpuhost_cluster_k8s_cacert_data}" +echo "dpuhost_cluster_k8s_token_file: ${dpuhost_cluster_k8s_token_file}" +echo "dpuhost_cluster_k8s_cacert: ${dpuhost_cluster_k8s_cacert}" +echo "mtu: ${mtu}" +echo "metrics_port: ${metrics_port}" + +ovn_image=${ovnkube_image} \ + ovn_image_pull_policy=${image_pull_policy} \ + ovn_unprivileged_mode=${ovn_unprivileged_mode} \ + ovn_gateway_mode=${ovn_gateway_mode} \ + ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_loglevel_nb=${ovn_loglevel_nb} ovn_loglevel_sb=${ovn_loglevel_sb} \ + ovn_northd_backoff_interval=${ovn_northd_backoff_interval} \ + ovn_loglevel_northd=${ovn_loglevel_northd} \ + ovnkube_node_loglevel=${node_loglevel} \ + ovn_loglevel_controller=${ovn_loglevel_controller} \ + ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ + ovnkube_logfile_maxbackups=${ovnkube_logfile_maxbackups} \ + ovnkube_logfile_maxage=${ovnkube_logfile_maxage} \ + ovnkube_libovsdb_client_logfile=${ovnkube_libovsdb_client_logfile} \ + ovnkube_config_duration_enable=${ovnkube_config_duration_enable} \ + ovnkube_metrics_scale_enable=${ovnkube_metrics_scale_enable} \ + metrics_ip=${metrics_ip} \ + ovn_hybrid_overlay_net_cidr=${ovn_hybrid_overlay_net_cidr} \ + ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \ + ovn_disable_snat_multiple_gws=${ovn_disable_snat_multiple_gws} \ + ovn_disable_forwarding=${ovn_disable_forwarding} \ + ovn_encap_port=${ovn_encap_port} \ + ovn_disable_pkt_mtu_check=${ovn_disable_pkt_mtu_check} \ + ovn_v4_join_subnet=${ovn_v4_join_subnet} \ + ovn_v6_join_subnet=${ovn_v6_join_subnet} \ + ovn_v4_masquerade_subnet=${ovn_v4_masquerade_subnet} \ + ovn_v6_masquerade_subnet=${ovn_v6_masquerade_subnet} \ + ovn_multicast_enable=${ovn_multicast_enable} \ + ovn_admin_network_policy_enable=${ovn_admin_network_policy_enable} \ + ovn_egress_ip_enable=${ovn_egress_ip_enable} \ + ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ + ovn_egress_firewall_enable=${ovn_egress_firewall_enable} \ + ovn_egress_qos_enable=${ovn_egress_qos_enable} \ + ovn_multi_network_enable=${ovn_multi_network_enable} \ + ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ + ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ + ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ + ovn_ssl_en=${ovn_ssl_en} \ + ovn_remote_probe_interval=${ovn_remote_probe_interval} \ + ovn_monitor_all=${ovn_monitor_all} \ + ovn_ofctrl_wait_before_clear=${ovn_ofctrl_wait_before_clear} \ + ovn_enable_lflow_cache=${ovn_enable_lflow_cache} \ + ovn_lflow_cache_limit=${ovn_lflow_cache_limit} \ + ovn_lflow_cache_limit_kb=${ovn_lflow_cache_limit_kb} \ + ovn_netflow_targets=${ovn_netflow_targets} \ + ovn_sflow_targets=${ovn_sflow_targets} \ + ovn_ipfix_targets=${ovn_ipfix_targets} \ + ovn_ipfix_sampling=${ovn_ipfix_sampling} \ + ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ + ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ + ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ + ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ + ovn_acl_logging_rate_limit=${ovn_acl_logging_rate_limit} \ + ovn_empty_lb_events=${ovn_empty_lb_events} \ + ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ + ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ + ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ + ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ + ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_observ_enable=${ovn_observ_enable} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ + mtu_value=${mtu} \ + metrics_port=${metrics_port} \ + dpuhost_cluster_net_cidr=${dpuhost_cluster_net_cidr} \ + dpuhost_cluster_svc_cidr=${dpuhost_cluster_svc_cidr} \ + dpuhost_cluster_k8s_apiserver=${dpuhost_cluster_k8s_apiserver} \ + dpuhost_cluster_k8s_token=${dpuhost_cluster_k8s_token} \ + dpuhost_cluster_k8s_cacert_data=${dpuhost_cluster_k8s_cacert_data} \ + dpuhost_cluster_k8s_token_file=${dpuhost_cluster_k8s_token_file} \ + dpuhost_cluster_k8s_cacert=${dpuhost_cluster_k8s_cacert} \ + jinjanate ../templates/ovnkube-single-node-zone-dpu.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone-dpu.yaml + ovn_image=${ovnkube_image} \ ovn_image_pull_policy=${image_pull_policy} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ @@ -1052,6 +1207,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ ovn_monitor_all=${ovn_monitor_all} \ @@ -1081,6 +1237,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -1141,16 +1298,19 @@ net_cidr=${net_cidr} svc_cidr=${svc_cidr} \ host_network_namespace=${host_network_namespace} \ in_upgrade=${in_upgrade} \ advertise_default_network=${ovn_advertise_default_network} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ jinjanate ../templates/ovn-setup.yaml.j2 -o ${output_dir}/ovn-setup.yaml ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ jinjanate ../templates/rbac-ovnkube-node.yaml.j2 -o ${output_dir}/rbac-ovnkube-node.yaml ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -1158,9 +1318,11 @@ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ +ovn_enable_interconnect=${ovn_enable_interconnect} \ jinjanate ../templates/rbac-ovnkube-master.yaml.j2 -o ${output_dir}/rbac-ovnkube-master.yaml cp ../templates/rbac-ovnkube-identity.yaml.j2 ${output_dir}/rbac-ovnkube-identity.yaml diff --git a/dist/images/ovndb-raft-functions.sh b/dist/images/ovndb-raft-functions.sh index 38396fed33..0db584a020 100644 --- a/dist/images/ovndb-raft-functions.sh +++ b/dist/images/ovndb-raft-functions.sh @@ -9,7 +9,7 @@ verify-ovsdb-raft() { exit 1 fi - replicas=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + replicas=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get statefulset -n ${ovn_kubernetes_namespace} ovnkube-db -o=jsonpath='{.spec.replicas}') if [[ ${replicas} -lt 3 || $((${replicas} % 2)) -eq 0 ]]; then echo "at least 3 nodes need to be configured, and it must be odd number of nodes" @@ -25,7 +25,7 @@ db_part_of_cluster() { local db=${2} local port=${3} echo "Checking if ${pod} is part of cluster" - init_ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + init_ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}') if [[ $? != 0 ]]; then echo "Unable to get ${pod} ip " @@ -51,7 +51,7 @@ cluster_exists() { local db=${1} local port=${2} - db_pods=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + db_pods=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-db[^ ]+') for db_pod in $db_pods; do @@ -62,7 +62,7 @@ cluster_exists() { done # if we get here there is no cluster, set init_ip and get out - init_ip="$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + init_ip="$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ovnkube-db-0 -o=jsonpath='{.status.podIP}')" if [[ $? != 0 ]]; then return 1 @@ -89,7 +89,7 @@ check_and_apply_ovnkube_db_ep() { local port=${1} # return if ovn db service endpoint already exists - result=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + result=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get ep -n ${ovn_kubernetes_namespace} ovnkube-db 2>&1) test $? -eq 0 && return if ! echo ${result} | grep -q "NotFound"; then @@ -99,7 +99,7 @@ check_and_apply_ovnkube_db_ep() { # Get IPs of all ovnkube-db PODs ips=() for ((i = 0; i < ${replicas}; i++)); do - ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ovnkube-db-${i} -o=jsonpath='{.status.podIP}') if [[ ${ip} == "" ]]; then break diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index aa81ecfa4e..50751aa3da 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -44,6 +44,8 @@ fi # OVN_DAEMONSET_VERSION - version match daemonset and image - v1.2.0 # K8S_TOKEN - the apiserver token. Automatically detected when running in a pod - v3 # K8S_CACERT - the apiserver CA. Automatically detected when running in a pod - v3 +# K8S_TOKEN_FILE - the apiserver token file. Automatically detected when running in a pod - v3 +# K8S_CACERT_DATA - the apiserver CA data. # OVN_CONTROLLER_OPTS - the options for ovn-ctl # OVN_NORTHD_OPTS - the options for the ovn northbound db # OVN_GATEWAY_MODE - the gateway mode (shared or local) - v3 @@ -96,6 +98,7 @@ fi # OVN_NORTHD_BACKOFF_INTERVAL - ovn northd backoff interval in ms (default 300) # OVN_ENABLE_SVC_TEMPLATE_SUPPORT - enable svc template support # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support +# OVN_ALLOW_ICMP_NETPOL - allow ICMP and ICMPv6 regardless of network policy # OVN_OBSERV_ENABLE - enable observability for ovnkube # The argument to the command is the operation to be performed @@ -145,7 +148,7 @@ else fi # certs and private keys for k8s and OVN -K8S_CACERT=${K8S_CACERT:-/var/run/secrets/kubernetes.io/serviceaccount/ca.crt} +k8s_cacert=${K8S_CACERT:-/var/run/secrets/kubernetes.io/serviceaccount/ca.crt} ovn_ca_cert=/ovn-cert/ca-cert.pem ovn_nb_pk=/ovn-cert/ovnnb-privkey.pem @@ -326,12 +329,14 @@ ovn_enable_svc_template_support=${OVN_ENABLE_SVC_TEMPLATE_SUPPORT:-true} ovn_network_qos_enable=${OVN_NETWORK_QOS_ENABLE:-false} # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER:-false} +# OVN_ALLOW_ICMP_NETPOL - allow ICMP/ICMPv6 with network policy +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL:-false} # OVN_OBSERV_ENABLE - enable observability for ovnkube ovn_observ_enable=${OVN_OBSERV_ENABLE:-false} # OVN_NOHOSTSUBNET_LABEL - node label indicating nodes managing their own network ovn_nohostsubnet_label=${OVN_NOHOSTSUBNET_LABEL:-""} # OVN_DISABLE_REQUESTEDCHASSIS - disable requested-chassis option during pod creation -# should be set to true when dpu nodes are in the cluster +# should be set to true when dpu nodes are in the cluster for OVN Central mode ovn_disable_requestedchassis=${OVN_DISABLE_REQUESTEDCHASSIS:-false} # external_ids:host-k8s-nodename is set on an Open_vSwitch enabled system if the ovnkube stack @@ -450,7 +455,7 @@ ready_to_start_node() { ovnkube_db_ep=$(get_ovnkube_zone_db_ep) echo "Getting the ${ovnkube_db_ep} ep" # See if ep is available ... - IFS=" " read -a ovn_db_hosts <<<"$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + IFS=" " read -a ovn_db_hosts <<<"$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get ep -n ${ovn_kubernetes_namespace} ${ovnkube_db_ep} -o=jsonpath='{range .subsets[0].addresses[*]}{.ip}{" "}')" if [[ ${#ovn_db_hosts[@]} == 0 ]]; then return 1 @@ -632,6 +637,32 @@ check_health() { return 1 } +get_dpu_gw_options() { + # If ovn_gateway_opts or ovn_gateway_router_subnet is not set as environment variable, gather them from ovs settings + if [[ ${ovn_gateway_opts} == "" ]]; then + # get the gateway interface + gw_iface=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-interface | tr -d \") + if [[ ${gw_iface} == "" ]]; then + echo "Couldn't get OVN Gateway Interface from ovs external_ids setting" + else + ovn_gateway_opts="--gateway-interface=${gw_iface} " + fi + + # get the gateway nexthop + gw_nexthop=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-nexthop | tr -d \") + if [[ ${gw_nexthop} == "" ]]; then + echo "Couldn't get OVN Gateway NextHop from ovs external_ids setting" + else + ovn_gateway_opts+="--gateway-nexthop=${gw_nexthop} " + fi + fi + + # this is only required if the DPU and DPU Host are in different subnets + if [[ ${ovn_gateway_router_subnet} == "" ]]; then + ovn_gateway_router_subnet=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-router-subnet | tr -d \") + fi +} + display_file() { if [[ -f $3 ]]; then echo "====================== $1 pid " @@ -837,7 +868,7 @@ set_ovnkube_db_ep() { ovnkube_db_ep=$(get_ovnkube_zone_db_ep) echo "=============== setting ${ovnkube_db_ep} endpoints to ${ips[@]}" # create a new endpoint for the headless onvkube-db service without selectors - kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} apply -f - < **DOCA-Host** -> **Linux** -> **x86_64** -> **doca-ofed** -> {*Your-OS-Distribution*} -> {*Your-OS-Version*} -> {*Preferred installation type*}, then follow the instructions displayed below the form to install the package. + + Note: Some extra packages may be required depending on your distribution. + +2. Make sure that **bfb-install** exists after the above step. +3. Start **rshim** by running either “systemctl enable -–now rshim” or simply “rshim”. +4. Make sure that `/dev/rshim*` device file shows up after the above step. + +5. Update BFB and firmware for Mellanox Bluefield DPUs + * Go to [developer.nvidia.com/doca-downloads](https://developer.nvidia.com/doca-downloads), in the “Select” form, choose **BlueField** -> **BF-Bundle** -> **Ubuntu** -> {*Version*} -> **BFB**, then click “Download” to start downloading. + * Upload the BFB package to the Host. + * On the Host, follow the instructions to install the BFB package. You may provide a config file to set up a password for the **ubuntu** account, in the following format: “**ubuntu_PASSWORD='$1……'**” + * An encrypted password can be generated by command “**openssl passwd -1**”. + * Please power cycle (off and then on) the Host to reboot and run the newly installed software and firmware. + +6. The DPU has two modes, DPU and NIC, modes. DPU mode is required for this solution. + * Run following commands on the Host to identify the current mode or update it to enable DPU mode: + + ```bash + user@fedora: mlxconfig -d /dev/mst/ q INTERNAL_CPU_OFFLOAD_ENGINE + ENABLED(0) + + # to configure BlueField 3: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_OFFLOAD_ENGINE=0 + + # to configure BlueField 2: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_PAGE_SUPPLIER=0 INTERNAL_CPU_ESWITCH_MANAGER=0 INTERNAL_CPU_IB_VPORT0=0 INTERNAL_CPU_OFFLOAD_ENGINE=0 + ``` + +7. Reboot. + +8. Optional: At this point you may follow this guide in order to enable OVS DOCA offload support: [docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html](https://docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html) + +9. On the Host, configure the desired number of VFs, then rename the first VF device so that it can be dedicated to and referenced by OVN-Kubernetes as the OVN-Kubernetes management port: + + ```bash + user@fedora: echo ${num_of_desired_vfs} > /sys/class/net/${interface}/devices/sriov_numvfs + user@fedora: ip link set ens1f0v0 down + user@fedora: ip link set ens1f0v0 name forOVN0 + user@fedora: ip link set forOVN0 up + ``` + +10. The BFB package installed earlier includes Open vSwitch (OVS). OVS will be installed as a systemd service, and the service is enabled by default. By default DPU will come up with 2 bridges, ovsbr1 and ovsbr2, regardless if the port is cabled or not. You may delete them and create your own bridges, just remember to add uplink **p0/p1** and x86 representor **pf0hpf/pf1hpf** to the new bridge. We will use a tool called minicom to get into the DPU from the host and configure OVS. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ovs-vsctl show + c41c98ac-0159-4874-97d5-17a4d2647d70 + Bridge ovsbr2 + Port en3f1pf1sf0 + Interface en3f1pf1sf0 + Port p1 + Interface p1 + Port pf1hpf + Interface pf1hpf + Port ovsbr2 + Interface ovsbr2 + type: internal + Bridge ovsbr1 + Port p0 + Interface p0 + Port ovsbr1 + Interface ovsbr1 + type: internal + Port pf0hpf + Interface pf0hpf + Port en3f0pf0sf0 + Interface en3f0pf0sf0 + ovs_version: "3.2.1005" + user@ubuntu: ovs-vsctl del-br ovsbr1 + user@ubuntu: ovs-vsctl del-br ovsbr2 + user@ubuntu: ovs-vsctl add-br brp0 + user@ubuntu: ovs-vsctl add-port brp0 p0 + user@ubuntu: ovs-vsctl add-port brp0 pf0hpf + ``` + +11. Now that the OVS bridge is created with the proper port configuration, we need to configure the IP address of the bridge. Typically this involves moving the IP address that was already configured on the **en3f0pf0sf0** interface to the **brp0** bridge. This IP address will be used for Geneve encapsulation (ovn-encap-ip), and therefore we must configure the OVS bridge so that OVN is aware of it. Additionally, take note of the default gateway route on the Host (10.1.65.1 in this example). We will need to configure this as well in the OVS bridge so OVN will use it as its default gateway. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ip addr del 10.1.65.155/24 dev en3f0pf0sf0 + user@ubuntu: ip addr add 10.1.65.155/24 dev brp0 + #### make brp0 as the default route interface + user@ubuntu: ip r add default via 10.1.65.1 dev brp0 + #### configure OVS + user@ubuntu: ovs-vsctl set Open_vSwitch . other_config:hw-offload=true + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-encap-ip="10.1.65.155" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-interface="brp0" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-nexthop="10.1.65.1" + #### configure the hostname of the Host as it will appear in the Host Kubernetes Cluster + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:host-k8s-nodename="host-worker-1" + ``` + +## Deploying OVN-Kubernetes + +A version of OVN-Kubernetes at least with 1.3 is required for DPUs. At the time of this writing, 1.3 is in Alpha state. The following steps should be done from a jumphost that has Kubeconfig access to both the Host and DPU cluster. + +1. Build or download the OVN-Kubernetes container images. Refer to this [image build guide](../developer-guide/image-build.md) on how to build/obtain the artifacts. +2. Upload the images to a container registry that is reachable by both clusters. +3. Label all Host nodes with DPU with **k8s.ovn.org/dpu-host=""** +4. Label all DPU nodes with **k8s.ovn.org/dpu=""** +5. `git clone https://github.com/ovn-kubernetes/ovn-kubernetes` to obtain the helm charts. +6. Follow the [upstream installation guide](../installation/launching-ovn-kubernetes-with-dpu.md) to configure the helm charts correctly and install OVN-Kubernetes to the Host and DPU. + +## Install SR-IOV Device Plugin + +OVN-Kubernetes relies on SR-IOV Plugin to provision VFs for the pods. Once allocated, OVN-Kubernetes will plug the VF for the pod on the Host into the pod network namespace. Then, on the DPU side, it will plug in the VF representor into OVS. From the jumphost follow these steps and use the kubeconfig of the Host Kubernetes cluster. + +1. `git clone https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin`. Use at least tag v3.11.0. +2. Configure the SR-IOV resource that OVN-Kubernetes will use. Replace the content of `deployments/configMap.yaml` with: + + ```yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: sriovdp-config + namespace: kube-system + data: + config.json: | + { + "resourceList": [ + { + "resourceName": "asap2_vf", + "resourcePrefix": "nvidia.com", + "excludeTopology": true, + "selectors": { + "vendors": [ "15b3" ], + "devices": [ "101e" ], + "drivers": [ "mlx5_core" ], + "pfNames": [ "ens1f0np0#1-7" ] + } + } + ] + } + ``` + +3. `kubectl create -f deployments/configMap.yaml` +4. `kubectl create -f deployments/sriovdp-daemonset.yaml` + +## Install Multus + +Multus is needed in order to pass the VF allocated by SR-IOV Plugin to OVN-Kubernetes as the DeviceID. Furthermore, in addition to the primary network, OVN-Kubernetes supports Secondary Networks using Secondary Network Attachment Definitions (NADs) or Secondary User Defined Networks (UDNs). In simpler terms, a pod can have a VF for its default gateway interface, as well as one or more VFs for secondary networks. To leverage this capability, Multus needs to be installed. Follow these steps on the jumphost while using the Host kubeconfig. + +1. Download the deployment spec for Multus. Use at least tag v4.2.3: + + ```bash + user@jumphost: curl -LO https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/master/deployments/multus-daemonset.yml + ``` + +2. Create the Multus cni-conf file that will be used with OVN-Kubernetes: + + ```bash + user@jumphost: cat > cni-conf.json << 'EOF' + { + "name": "multus-cni-network", + "type": "multus", + "logLevel": "verbose", + "logFile": "/var/log/multus.log", + "namespaceIsolation": false, + "multusNamespace": "default", + "clusterNetwork": "ovn-primary", + "confDir": "/etc/cni/net.d", + "readinessindicatorfile": "/etc/cni/net.d/10-ovn-kubernetes.conf", + "kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig" + } + EOF + ``` + +3. Create the configMap using the cni-conf file: + + ```bash + user@jumphost: kubectl -n kube-system delete configmap multus-cni-config --ignore-not-found=true + user@jumphost: kubectl -n kube-system create configmap multus-cni-config --from-file=cni-conf.json + ``` + +4. Edit the `multus-daemonset.yml` previously downloaded. + + ```yaml + ... + spec: + ... + template: + ... + spec: + ... + containers: + - name: kube-multus + image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot + command: ["/thin_entrypoint"] + args: + - "/tmp/multus-conf/00-multus.conf" # Modify multus-conf-file + ... + volumes: + ... + - name: multus-cfg + configMap: + name: multus-cni-config + items: + - key: cni-conf.json + path: 00-multus.conf # Modify to 00-multus.conf + ``` + +5. Create the Multus Daemonset. + + ```bash + kubectl apply -f multus-daemonset.yml + ``` + +## Validating the Setup + +Now that we have configured everything it is time to create a pod and verify that it is properly offloaded. The last step to do before we can start a pod is to create the Network Attachment Definition (NAD) so that OVN-Kubernetes will be invoked as the CNI and use VFs for the default network. Additionally, Primary or Secondary User Defined Networks (UDNs) could also be created, but for the purpose of this guide we will focus on the Cluster Default Network (CDN). Follow these steps from the jumphost with the Host kubeconfig to create the NAD and a pod to test with. + +1. Configure the primary default NAD. Notice the `resourceName` annotation is set to the SR-IOV device plugin resource we previously configured: + + ```bash + cat <... + ``` + +3. **Binary collection** happens during log export. The `export-kind-logs.sh` script + searches all containers for the crashed binary and copies it alongside the coredump. + +4. **Artifacts are uploaded** to GitHub Actions and can be downloaded from the job's + artifacts section. + +### Downloading Artifacts + +After a CI job completes, download the `kind-logs-*` artifact from the GitHub Actions +job page. Extract it to find: + +``` +/tmp/kind/logs/coredumps/ +├── core.29132.ovnkube.ovn-worker.6 # Coredump file +└── binaries/ + └── ovnkube # Matching binary +``` + +### Debugging with Delve + +Use the [Delve](https://github.com/go-delve/delve) debugger for post-mortem analysis. + +1. **Create a path substitution file** (`dlv.init`) to map build paths to your local + source checkout: + + ``` + config substitute-path /workspace/ovn-kubernetes/go-controller /path/to/your/ovn-kubernetes/go-controller + config substitute-path /usr/local/go /path/to/your/go/installation + ``` + + The build paths can be found by running `dlv core` without the init file and + using the `list` command - it will show the paths it's looking for. + +2. **Start the debugger**: + + ```bash + dlv core ./binaries/ovnkube ./core.29132.ovnkube.ovn-worker.6 --init dlv.init + ``` + +3. **Explore the crash**: + + ``` + (dlv) goroutines # List all goroutines + (dlv) goroutine # Switch to a specific goroutine + (dlv) bt # Show backtrace + (dlv) frame # Select stack frame + (dlv) list # Show source code at current location + (dlv) locals # Show local variables + (dlv) print # Print variable value + ``` + +### Debugging C Binaries with GDB (e.g. FRR) + +Some coredumps come from C binaries such as FRR's `bgpd` or `zebra`, not from Go +binaries. These require GDB instead of Delve. + +The key challenge is matching the exact container image that produced the coredump, +since GDB needs the same binary and shared libraries to resolve symbols. + +1. **Identify the image that produced the coredump.** Check the CI job logs for the + `docker run` command that started the crashed process. For example, the external + FRR container may use `quay.io/frrouting/frr:9.1.0` (deployed via + `contrib/kind-common.sh`). + +2. **Run the same image with the coredumps mounted:** + + ```bash + docker run --platform linux/amd64 -it \ + -v /path/to/coredumps:/coredumps \ + quay.io/frrouting/frr:9.1.0 sh + ``` + + Using `--platform linux/amd64` is important if the coredump was generated on + x86_64 and you are on a different architecture (e.g. Apple Silicon). + +3. **Install GDB and debug symbols inside the container:** + + ```bash + apk add gdb frr-dbg musl-dbg + ``` + + The exact package names depend on the base distro. Alpine uses `-dbg` suffix. + +4. **Run GDB:** + + ```bash + gdb /usr/lib/frr/bgpd /coredumps/core.38907.bgpd.ovn-control-plane.11 + ``` + +5. **Explore the crash:** + + ``` + (gdb) bt # Show backtrace + (gdb) thread apply all bt # Backtraces for all threads + (gdb) frame # Select stack frame + (gdb) info locals # Show local variables + (gdb) info args # Show function arguments + (gdb) print *some_ptr # Dereference and print a pointer + (gdb) info sharedlibrary # Check if all shared libraries are resolved + ``` + +6. **Troubleshooting missing symbols.** If the backtrace shows `??` for most frames: + - Run `info sharedlibrary` in GDB. Lines marked `(*)` are missing debug info. + - Verify you are using the exact same image tag that produced the coredump. + Floating tags (like `latest` or even `9.1.0`) may have been rebuilt with updated + packages. If the shared library versions don't match (GDB will print warnings + about missing `.so` files), you need the exact image digest from CI. + - Install additional `-dbg` packages for libraries that appear in the backtrace. + +### Local Development + +To enable coredump collection in a local KIND cluster: + +```bash +ENABLE_COREDUMPS=true ./contrib/kind.sh +``` + +To manually export logs with coredump binaries: + +```bash +./contrib/export-kind-logs.sh /path/to/output +``` diff --git a/docs/features/user-defined-networks/user-defined-networks.md b/docs/features/user-defined-networks/user-defined-networks.md index f2e7348eb7..ac94e519f3 100644 --- a/docs/features/user-defined-networks/user-defined-networks.md +++ b/docs/features/user-defined-networks/user-defined-networks.md @@ -124,6 +124,7 @@ of end users. Currently supported topology types for a given network include: `Layer3`: is a topology type wherein the pods or VMs are connected to their node’s local router and all these routers are then connected to the distributed switch across nodes. + * Each pod would hence get an IP from the node's subnet segment * When in doubt which topology to use go with layer3 which is the same topology as the cluster default network @@ -142,6 +143,7 @@ network (grey color) which is only used for kubelet healthchecks. `Layer2`: is a topology type wherein the pods or VMs are all connected to the same layer2 flat switch. + * Usually used when the applications deployed expect a layer2 type network connection (Perhaps applications want a single broadcast domain, latency sensitive, use proprietary L2 protocols) * Common in Virtualization world for seamless migration of the VM since @@ -149,7 +151,7 @@ same layer2 flat switch. during live migration * Can be of type `primary` or `secondary` -![l2-UDN](images/L2DeepDive-2segments.png) +![l2-UDN](images/L2DeepDive-2segments.jpg) Here we can see a blue and green P-UDN. On node1, pod1 is part of green UDN and pod2 is part of blue UDN. They each have a udn-0 interface that is attached to @@ -160,6 +162,7 @@ network (grey color) which is only used for kubelet healthchecks. `Localnet`: is a topology type wherein the pods or VMs attached to a localnet network on the overlay can egress to the provider’s physical network + * without SNATing to nodeIPs… preserves the podIPs * podIPs can be on the same subnet as the provider’s VLAN * VLAN IDs can be used to mark the traffic coming from the localnet for diff --git a/docs/images/ovnk-accelerated.excalidraw b/docs/images/ovnk-accelerated.excalidraw new file mode 100644 index 0000000000..044d3720cd --- /dev/null +++ b/docs/images/ovnk-accelerated.excalidraw @@ -0,0 +1,1149 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "3PSV2IpmimdHV3TGHwCnL", + "type": "rectangle", + "x": 469.71484375, + "y": 230.94921875, + "width": 975.9257812499999, + "height": 370.6484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 3 + }, + "seed": 1323383732, + "version": 590, + "versionNonce": 1650696134, + "isDeleted": false, + "boundElements": [], + "updated": 1769213052357, + "link": null, + "locked": false + }, + { + "id": "Hhj9Yei2KIDBGtqDrnLuE", + "type": "text", + "x": 494.09765625, + "y": 233.42578125000003, + "width": 676.264217346056, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 234782220, + "version": 297, + "versionNonce": 613304933, + "isDeleted": false, + "boundElements": [], + "updated": 1769807221667, + "link": null, + "locked": false, + "text": "Kubernetes Worker Node", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubernetes Worker Node", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "AFBhMx_FMLkpvpoVM55si", + "type": "diamond", + "x": 647.9921875, + "y": 447.890625, + "width": 186.53515625000003, + "height": 124.37109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 2 + }, + "seed": 1109773108, + "version": 938, + "versionNonce": 195622373, + "isDeleted": true, + "boundElements": [], + "updated": 1769806380801, + "link": null, + "locked": false + }, + { + "id": "d21SDy11vytpGym8ZRv9Z", + "type": "text", + "x": 699.8398056030273, + "y": 492.4833984375, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 265828788, + "version": 857, + "versionNonce": 401966827, + "isDeleted": true, + "boundElements": [], + "updated": 1769806380801, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "AFBhMx_FMLkpvpoVM55si", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MMRCgxZ2SkPQRTbazh5Mt", + "type": "diamond", + "x": 826.091796875, + "y": 417.40234375, + "width": 186.71093749999997, + "height": 160.96484375000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": { + "type": 2 + }, + "seed": 1941094156, + "version": 1260, + "versionNonce": 249372837, + "isDeleted": true, + "boundElements": [], + "updated": 1769806362195, + "link": null, + "locked": false + }, + { + "id": "fBzHO81d67o5rHJ7nhi37", + "type": "text", + "x": 893.0474243164062, + "y": 462.6435546875, + "width": 52.4442138671875, + "height": 70, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 262077836, + "version": 1221, + "versionNonce": 830367787, + "isDeleted": true, + "boundElements": [], + "updated": 1769806362195, + "link": null, + "locked": false, + "text": "Pod\nB", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MMRCgxZ2SkPQRTbazh5Mt", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "tYzSvRmwfQQNkfrVTe1P5", + "type": "rectangle", + "x": 710.88671875, + "y": 671.09375, + "width": 234.94921875, + "height": 144.21484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": { + "type": 3 + }, + "seed": 198185882, + "version": 708, + "versionNonce": 962154202, + "isDeleted": false, + "boundElements": [ + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow" + } + ], + "updated": 1769213059064, + "link": null, + "locked": false + }, + { + "id": "3d5gOfYF1IUhTLFt9bgVo", + "type": "rectangle", + "x": 1124.23046875, + "y": 645.26171875, + "width": 263.3984375, + "height": 76.671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 160618182, + "version": 241, + "versionNonce": 507884954, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + } + ], + "updated": 1769212914995, + "link": null, + "locked": false + }, + { + "id": "otyemJxFJECPTvDVhd1hb", + "type": "text", + "x": 1153.78125, + "y": 666.8203125, + "width": 202.52484130859375, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 400406918, + "version": 296, + "versionNonce": 1094596075, + "isDeleted": false, + "boundElements": [], + "updated": 1769806431368, + "link": null, + "locked": false, + "text": "OVN-Kube DPU", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN-Kube DPU", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1chdpPBLx0VEf6sBwnyrT", + "type": "rectangle", + "x": 542.08203125, + "y": 300.78515625, + "width": 179.29296875, + "height": 75.30859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 1050229210, + "version": 33, + "versionNonce": 549854406, + "isDeleted": false, + "boundElements": [], + "updated": 1769211856008, + "link": null, + "locked": false + }, + { + "id": "E1qyG2mrQnzUtFZRnlzon", + "type": "text", + "x": 577.8984375, + "y": 320.80078125, + "width": 99.06443786621094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1111918810, + "version": 12, + "versionNonce": 888734571, + "isDeleted": false, + "boundElements": [], + "updated": 1769806292888, + "link": null, + "locked": false, + "text": "Kubelet", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubelet", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YPI4K5hgJbgHj0hYUQy7U", + "type": "rectangle", + "x": 1124.60546875, + "y": 764.3125, + "width": 260.94921875, + "height": 99.75390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 2135829062, + "version": 407, + "versionNonce": 874908314, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + }, + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow" + } + ], + "updated": 1769213056152, + "link": null, + "locked": false + }, + { + "id": "xdskfV2GHpQr09QrrldPB", + "type": "text", + "x": 1223.94921875, + "y": 795.8984375, + "width": 55.74822998046875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1687121690, + "version": 302, + "versionNonce": 597504357, + "isDeleted": false, + "boundElements": [], + "updated": 1769806428054, + "link": null, + "locked": false, + "text": "OVN", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow", + "x": 1251.1328125, + "y": 721.33984375, + "width": 0.85546875, + "height": 43.734375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": { + "type": 2 + }, + "seed": 1411280134, + "version": 307, + "versionNonce": 1938263430, + "isDeleted": false, + "boundElements": [], + "updated": 1769212944324, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.85546875, + 43.734375 + ] + ], + "startBinding": { + "elementId": "3d5gOfYF1IUhTLFt9bgVo", + "mode": "inside", + "fixedPoint": [ + 0.48178852142963075, + 0.9922559608722233 + ] + }, + "endBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.48815172971273746, + 0.007635979167482476 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false, + "moveMidPointsWithElement": false + }, + { + "id": "nOHqcMRCCN5CvsIk9L7HK", + "type": "text", + "x": 798, + "y": 723.234375, + "width": 55.468231201171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 260809434, + "version": 338, + "versionNonce": 995951973, + "isDeleted": false, + "boundElements": [], + "updated": 1769806415498, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "lv5dwAOUTpgj7UyAdI9C8", + "type": "rectangle", + "x": 469.86556773558385, + "y": 612.6299835602476, + "width": 983.9485520288324, + "height": 291.42362662950524, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": { + "type": 3 + }, + "seed": 995117318, + "version": 1088, + "versionNonce": 1696375878, + "isDeleted": false, + "boundElements": [], + "updated": 1769213048339, + "link": null, + "locked": false + }, + { + "id": "nXwCdHqSV7QcCaC0Erx7X", + "type": "text", + "x": 498.4374982030654, + "y": 623.1721233376917, + "width": 61.824249267578125, + "height": 35, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 383758106, + "version": 10, + "versionNonce": 1631039019, + "isDeleted": false, + "boundElements": [], + "updated": 1769808323507, + "link": null, + "locked": false, + "text": "DPU", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DPU", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "IfUjPJctzeOIeVT3m_NKp", + "type": "line", + "x": 745.578125, + "y": 567.96484375, + "width": 36.9140625, + "height": 104.56640625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0U", + "roundness": { + "type": 2 + }, + "seed": 1418297306, + "version": 72, + "versionNonce": 1158579290, + "isDeleted": false, + "boundElements": [], + "updated": 1769213000233, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 36.9140625, + 104.56640625 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "PrirCcAPzRM9oZsZiPQfu", + "type": "line", + "x": 923.26171875, + "y": 569.6640625, + "width": 61.421875, + "height": 101.24609375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0V", + "roundness": { + "type": 2 + }, + "seed": 424258758, + "version": 73, + "versionNonce": 882050117, + "isDeleted": false, + "boundElements": [], + "updated": 1769806405260, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -61.421875, + 101.24609375 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "p81TteZyn3j3h51DDmQTH", + "type": "line", + "x": 825.7578125, + "y": 815.96484375, + "width": 1.26953125, + "height": 64.45703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0W", + "roundness": { + "type": 2 + }, + "seed": 1162031450, + "version": 132, + "versionNonce": 949266138, + "isDeleted": false, + "boundElements": [], + "updated": 1769213029964, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 1.26953125, + 64.45703125 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "5tWnGTpPXbqm73Qbq0o0q", + "type": "rectangle", + "x": 781.54296875, + "y": 882.6015625, + "width": 81.39453125, + "height": 45, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0X", + "roundness": { + "type": 3 + }, + "seed": 33982854, + "version": 71, + "versionNonce": 1656813829, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "sTQv7IwvRCIrTkGyDXdOe" + } + ], + "updated": 1769806419424, + "link": null, + "locked": false + }, + { + "id": "sTQv7IwvRCIrTkGyDXdOe", + "type": "text", + "x": 789.7461013793945, + "y": 887.6015625, + "width": 64.98826599121094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Y", + "roundness": null, + "seed": 515413210, + "version": 9, + "versionNonce": 1755680869, + "isDeleted": false, + "boundElements": [], + "updated": 1769806419425, + "link": null, + "locked": false, + "text": "eth0", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "5tWnGTpPXbqm73Qbq0o0q", + "originalText": "eth0", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow", + "x": 1125.1015625, + "y": 813.7265625, + "width": 181.09375, + "height": 54.76953125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Z", + "roundness": { + "type": 2 + }, + "seed": 991405146, + "version": 229, + "versionNonce": 1472329882, + "isDeleted": false, + "boundElements": [], + "updated": 1769213071066, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -181.09375, + -54.76953125 + ] + ], + "startBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.0019011122254988548, + 0.4953596741982222 + ] + }, + "endBinding": { + "elementId": "tYzSvRmwfQQNkfrVTe1P5", + "mode": "inside", + "fixedPoint": [ + 0.9922190632949274, + 0.6092526883176684 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "MqPB_VDtU5rMycT-a935q", + "type": "rectangle", + "x": 1081.32421875, + "y": 447.765625, + "width": 315.91796875, + "height": 93.0859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0a", + "roundness": { + "type": 3 + }, + "seed": 1303795158, + "version": 444, + "versionNonce": 1847224491, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7u9OSdgdx8ADuPRU4aG-I" + } + ], + "updated": 1769806402993, + "link": null, + "locked": false + }, + { + "id": "7u9OSdgdx8ADuPRU4aG-I", + "type": "text", + "x": 1100.5006408691406, + "y": 476.80859375, + "width": 277.56512451171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0b", + "roundness": null, + "seed": 1295666890, + "version": 92, + "versionNonce": 1404967755, + "isDeleted": false, + "boundElements": [], + "updated": 1769806402993, + "link": null, + "locked": false, + "text": "OVN-Kube DPU-Host", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MqPB_VDtU5rMycT-a935q", + "originalText": "OVN-Kube DPU-Host", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "uA4KWyM_Ie7myxT2uvmCP", + "type": "diamond", + "x": 829.755859375, + "y": 412.166015625, + "width": 190.07812499999997, + "height": 161.12109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0c", + "roundness": { + "type": 2 + }, + "seed": 1507403301, + "version": 1119, + "versionNonce": 445224427, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "xVvf92bga4Yjg2V49SLlB" + } + ], + "updated": 1769806392104, + "link": null, + "locked": false + }, + { + "id": "xVvf92bga4Yjg2V49SLlB", + "type": "text", + "x": 882.2992172241211, + "y": 475.4462890625, + "width": 84.95234680175781, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0d", + "roundness": null, + "seed": 1652234629, + "version": 1040, + "versionNonce": 475373707, + "isDeleted": false, + "boundElements": [], + "updated": 1769806392104, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "uA4KWyM_Ie7myxT2uvmCP", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "onVWXBwWSHPutTw8Ddqy2", + "type": "diamond", + "x": 646, + "y": 413.212890625, + "width": 190.07812499999997, + "height": 161.12109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0e", + "roundness": { + "type": 2 + }, + "seed": 183245483, + "version": 1130, + "versionNonce": 1930976357, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "qg1fbVwEByLpag6WgrzZj" + } + ], + "updated": 1769806393479, + "link": null, + "locked": false + }, + { + "id": "qg1fbVwEByLpag6WgrzZj", + "type": "text", + "x": 699.7333602905273, + "y": 476.4931640625, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0f", + "roundness": null, + "seed": 260672843, + "version": 1052, + "versionNonce": 95689349, + "isDeleted": false, + "boundElements": [], + "updated": 1769806395323, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "onVWXBwWSHPutTw8Ddqy2", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "19Uo1FixsTScuIkccHk0h", + "type": "text", + "x": 492.79887739537054, + "y": 549.5296776267693, + "width": 63.532257080078125, + "height": 35, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0g", + "roundness": null, + "seed": 1559443979, + "version": 28, + "versionNonce": 492903403, + "isDeleted": false, + "boundElements": [], + "updated": 1769808327521, + "link": null, + "locked": false, + "text": "Host", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Host", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff", + "lockedMultiSelections": {} + }, + "files": {} +} \ No newline at end of file diff --git a/docs/images/ovnk-accelerated.svg b/docs/images/ovnk-accelerated.svg new file mode 100644 index 0000000000..5d65dccc9d --- /dev/null +++ b/docs/images/ovnk-accelerated.svg @@ -0,0 +1,4 @@ + + +Kubernetes Worker NodeOVN-Kube DPUKubeletOVNOVSDPUeth0OVN-Kube DPU-HostPod BPod AHost \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.excalidraw b/docs/images/ovnk-unaccelerated.excalidraw new file mode 100644 index 0000000000..d373705c25 --- /dev/null +++ b/docs/images/ovnk-unaccelerated.excalidraw @@ -0,0 +1,2131 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "3PSV2IpmimdHV3TGHwCnL", + "type": "rectangle", + "x": 467.4296875, + "y": 221.05078125, + "width": 1031.4375, + "height": 575.5703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 3 + }, + "seed": 1323383732, + "version": 437, + "versionNonce": 1307439322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211652514, + "link": null, + "locked": false + }, + { + "id": "Hhj9Yei2KIDBGtqDrnLuE", + "type": "text", + "x": 494.56640624999994, + "y": 233.42578125, + "width": 371.5190825257982, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 234782220, + "version": 215, + "versionNonce": 956850501, + "isDeleted": false, + "boundElements": [], + "updated": 1769807070243, + "link": null, + "locked": false, + "text": "Kubernetes Worker Node", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubernetes Worker Node", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "AFBhMx_FMLkpvpoVM55si", + "type": "diamond", + "x": 743.5546875, + "y": 445.03125, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 2 + }, + "seed": 1109773108, + "version": 903, + "versionNonce": 1838605157, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "d21SDy11vytpGym8ZRv9Z" + } + ], + "updated": 1769807111952, + "link": null, + "locked": false + }, + { + "id": "d21SDy11vytpGym8ZRv9Z", + "type": "text", + "x": 798.0214462280273, + "y": 515.4755859375, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 265828788, + "version": 853, + "versionNonce": 1242508997, + "isDeleted": false, + "boundElements": [], + "updated": 1769807111952, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "AFBhMx_FMLkpvpoVM55si", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MMRCgxZ2SkPQRTbazh5Mt", + "type": "diamond", + "x": 775.771484375, + "y": 637.09375, + "width": 167.17578125000003, + "height": 123.34765625000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": { + "type": 2 + }, + "seed": 1941094156, + "version": 1015, + "versionNonce": 1658634021, + "isDeleted": true, + "boundElements": [ + { + "type": "text", + "id": "fBzHO81d67o5rHJ7nhi37" + } + ], + "updated": 1769807106331, + "link": null, + "locked": false + }, + { + "id": "fBzHO81d67o5rHJ7nhi37", + "type": "text", + "x": 829.225456237793, + "y": 686.4306640625, + "width": 60.67994689941406, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 262077836, + "version": 972, + "versionNonce": 1437462955, + "isDeleted": true, + "boundElements": [], + "updated": 1769807106331, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MMRCgxZ2SkPQRTbazh5Mt", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "o-BS3wjgvpBdXfEGxS4zJ", + "type": "rectangle", + "x": 653.71484375, + "y": 517.03515625, + "width": 135.08984375, + "height": 63.8671875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJ", + "roundness": { + "type": 3 + }, + "seed": 1828499764, + "version": 508, + "versionNonce": 1836541978, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "CI86pCLaDcJplMW-JEioJ", + "type": "text", + "x": 675.0098114013672, + "y": 523.96875, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJV", + "roundness": null, + "seed": 583086900, + "version": 473, + "versionNonce": 1445346950, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "o-BS3wjgvpBdXfEGxS4zJ", + "originalText": "OVN-Kube\nK2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "uXVXhzqnTRn3VJnZ7vo3K", + "type": "rectangle", + "x": 837.640625, + "y": 549.546875, + "width": 342.1953125, + "height": 172.0546875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aO", + "roundness": { + "type": 3 + }, + "seed": 1521260428, + "version": 984, + "versionNonce": 893867226, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "EMcQCTn12gbouIGsMbCWR", + "type": "text", + "x": 855, + "y": 565.53515625, + "width": 39.619964599609375, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aQ", + "roundness": null, + "seed": 322143028, + "version": 709, + "versionNonce": 623410630, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "FGWbnjuJMPPm2--6Clo68", + "type": "rectangle", + "x": 1209.94921875, + "y": 505.71875, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aS", + "roundness": { + "type": 3 + }, + "seed": 581768628, + "version": 738, + "versionNonce": 2113644954, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "MHVGjmp21WFrjCeVuw6_M", + "type": "text", + "x": 1232.8403396606445, + "y": 533.654296875, + "width": 58.95994567871094, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aT", + "roundness": null, + "seed": 962236812, + "version": 713, + "versionNonce": 807458054, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 FW", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "FGWbnjuJMPPm2--6Clo68", + "originalText": "L7 FW", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "par5rlqYTD0bht0WGS4oz", + "type": "rectangle", + "x": 1218.1171875, + "y": 636.783203125, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aU", + "roundness": { + "type": 3 + }, + "seed": 246895884, + "version": 723, + "versionNonce": 2126997082, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "VJCFsIBfciMT2Hg4opZrX", + "type": "text", + "x": 1242.4383087158203, + "y": 664.71875, + "width": 56.099945068359375, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aV", + "roundness": null, + "seed": 1235276684, + "version": 724, + "versionNonce": 1082934342, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 LB", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "par5rlqYTD0bht0WGS4oz", + "originalText": "L7 LB", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ZHSHwJ-WMxVntYGdHzqAS", + "type": "rectangle", + "x": 875.52734375, + "y": 612.0859375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aW", + "roundness": { + "type": 3 + }, + "seed": 436416012, + "version": 786, + "versionNonce": 276663066, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "yhhtdkwmarFaKUI6H9eVm", + "type": "text", + "x": 893.5230178833008, + "y": 621.005859375, + "width": 95.03990173339844, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aWV", + "roundness": null, + "seed": 1538533684, + "version": 713, + "versionNonce": 1749535622, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "worker LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ZHSHwJ-WMxVntYGdHzqAS", + "originalText": "worker LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "bIAAQozA4blO6kgvckP3S", + "type": "rectangle", + "x": 1028.86328125, + "y": 611.146484375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ab", + "roundness": { + "type": 3 + }, + "seed": 330947340, + "version": 845, + "versionNonce": 2039551962, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "O88cyEH5yk5c4puqvqvCF", + "type": "text", + "x": 1059.6089401245117, + "y": 620.06640625, + "width": 69.53993225097656, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ac", + "roundness": null, + "seed": 1482856844, + "version": 776, + "versionNonce": 1972398790, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "SFC LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bIAAQozA4blO6kgvckP3S", + "originalText": "SFC LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "pVasypJX2u2AP43S1rJ-H", + "type": "rectangle", + "x": 480.009765625, + "y": 450.455078125, + "width": 891.2070312499999, + "height": 289.640625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ah", + "roundness": { + "type": 3 + }, + "seed": 1392129420, + "version": 528, + "versionNonce": 62172442, + "isDeleted": true, + "boundElements": [], + "updated": 1769211638753, + "link": null, + "locked": false + }, + { + "id": "oA5KoMQjshRzJUNQcjaiC", + "type": "text", + "x": 500.03125, + "y": 461.015625, + "width": 333.6326293945313, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aj", + "roundness": null, + "seed": 1841999628, + "version": 121, + "versionNonce": 974458010, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "DPU - K2 Kubernetes Cluster", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DPU - K2 Kubernetes Cluster", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "DXSgUtwjW1M5OtOfM2oOj", + "type": "text", + "x": 897.46875, + "y": 400.98828125, + "width": 35.91996765136719, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aq", + "roundness": null, + "seed": 1235853748, + "version": 93, + "versionNonce": 865510426, + "isDeleted": true, + "boundElements": [], + "updated": 1769211693537, + "link": null, + "locked": false, + "text": "VFs", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VFs", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "jTuqSQnd1xqE5jzyNgo_V", + "type": "text", + "x": 922.2275161743164, + "y": 553.3203125, + "width": 80.11993408203125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ar", + "roundness": null, + "seed": 257701772, + "version": 685, + "versionNonce": 1728258566, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "VF Reps", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VF Reps", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "kUpychMCsIeTRZMalnMDE", + "type": "rectangle", + "x": 504.18359375, + "y": 517.0546875, + "width": 140.1484375, + "height": 61.43359375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "as", + "roundness": { + "type": 3 + }, + "seed": 250474741, + "version": 420, + "versionNonce": 1471965530, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "KypHgViW6BpvPS6NP1wXN", + "type": "text", + "x": 528.0078582763672, + "y": 522.771484375, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "at", + "roundness": null, + "seed": 1179519573, + "version": 388, + "versionNonce": 1604120902, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK1", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "kUpychMCsIeTRZMalnMDE", + "originalText": "OVN-Kube\nK1", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "R8DOjTmolTEKd9pKdGNyl", + "type": "rectangle", + "x": 563.0703125, + "y": 608.494140625, + "width": 176.24218750000003, + "height": 104.79296875000006, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "au", + "roundness": { + "type": 3 + }, + "seed": 2033483131, + "version": 692, + "versionNonce": 702164506, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "1A3hQ4xYut0uu_3QxzAE5", + "type": "text", + "x": 594.2914505004883, + "y": 635.890625, + "width": 113.79991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "av", + "roundness": null, + "seed": 1918703131, + "version": 671, + "versionNonce": 448452742, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "Shared OVN\nK1 and K2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "R8DOjTmolTEKd9pKdGNyl", + "originalText": "Shared OVN\nK1 and K2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "3kqb-K8O9f08ArUDv9Cjf", + "type": "line", + "x": 821.24609375, + "y": 401.33984375, + "width": 113.5859375, + "height": 209.3515625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aw", + "roundness": { + "type": 2 + }, + "seed": 2063502261, + "version": 139, + "versionNonce": 1596119770, + "isDeleted": true, + "boundElements": [], + "updated": 1769211662686, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 113.5859375, + 209.3515625 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "4QSR0lJotZNZzQEMYnuKY", + "type": "line", + "x": 1010.19140625, + "y": 400.0546875, + "width": 21.296875, + "height": 211.91796875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ax", + "roundness": { + "type": 2 + }, + "seed": 1579370107, + "version": 196, + "versionNonce": 1847422726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211665400, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.296875, + 211.91796875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "5_Xt3FUxVeazyDS7r-Ucr", + "type": "arrow", + "x": 595.2326807094685, + "y": 579.48828125, + "width": 14.427475540531532, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b02", + "roundness": { + "type": 2 + }, + "seed": 1308316949, + "version": 81, + "versionNonce": 627137242, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 14.427475540531532, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "kUpychMCsIeTRZMalnMDE", + "mode": "orbit", + "fixedPoint": [ + 0.5315652620380965, + 0.531565262038097 + ] + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "QqSmIOXcdN0n9q_f3P72z", + "type": "arrow", + "x": 744.84765625, + "y": 648.58203125, + "width": 124.9464277396745, + "height": 20.664800962877393, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b03", + "roundness": { + "type": 2 + }, + "seed": 2041478555, + "version": 384, + "versionNonce": 1612306374, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 124.9464277396745, + -20.664800962877393 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.5237502641723047, + 0.5237502641723036 + ] + }, + "endBinding": { + "elementId": "ZHSHwJ-WMxVntYGdHzqAS", + "mode": "orbit", + "fixedPoint": [ + 0.23070594032628622, + 0.23070594032628763 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "Xo9luBmfagxGHywoTzuWJ", + "type": "line", + "x": 1090.875, + "y": 612.74609375, + "width": 118.109375, + "height": 61.48046875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b06", + "roundness": { + "type": 2 + }, + "seed": 1795601781, + "version": 77, + "versionNonce": 686092186, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 118.109375, + -61.48046875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "FrZUA_nXF7jrENeAm_iuN", + "type": "line", + "x": 1122.578125, + "y": 611.7421875, + "width": 87.8984375, + "height": 48.03125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b07", + "roundness": { + "type": 2 + }, + "seed": 140615867, + "version": 65, + "versionNonce": 2010574598, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 87.8984375, + -48.03125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "h0gFSUv-MCabHAwjdzstW", + "type": "line", + "x": 1140.62890625, + "y": 655.7109375, + "width": 76.375, + "height": 37.7421875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b08", + "roundness": { + "type": 2 + }, + "seed": 735261141, + "version": 47, + "versionNonce": 1217915994, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 76.375, + 37.7421875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "uyp2yTlDgYoUCgRf1t2Mw", + "type": "line", + "x": 1160.61328125, + "y": 646.52734375, + "width": 57.1875, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b09", + "roundness": { + "type": 2 + }, + "seed": 209223931, + "version": 31, + "versionNonce": 510598726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 57.1875, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "8RZ9vzUTGLaqkOXekPLDQ", + "type": "arrow", + "x": 747.41015625, + "y": 697.359375, + "width": 286.4296875, + "height": 37.015625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0A", + "roundness": { + "type": 2 + }, + "seed": 1520074645, + "version": 64, + "versionNonce": 441274650, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 286.4296875, + -37.015625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.8833472697193855, + 0.8833472697193837 + ] + }, + "endBinding": { + "elementId": "bIAAQozA4blO6kgvckP3S", + "mode": "orbit", + "fixedPoint": [ + 0.8338256639609369, + 0.8338256639609338 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "5hOE-WQdQ6EGLVGOoP1I2", + "type": "arrow", + "x": 717.5696974855308, + "y": 581.90234375, + "width": 21.679994111348492, + "height": 25.3359375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0B", + "roundness": { + "type": 2 + }, + "seed": 1498907035, + "version": 49, + "versionNonce": 2025809286, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.679994111348492, + 25.3359375 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "o-BS3wjgvpBdXfEGxS4zJ", + "mode": "orbit", + "fixedPoint": [ + 0.6290772163270132, + 0.6290772163270139 + ] + }, + "endBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.4954418212421667, + 0.49544182124216624 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "tYzSvRmwfQQNkfrVTe1P5", + "type": "rectangle", + "x": 992.984375, + "y": 572.734375, + "width": 234.94921875, + "height": 144.21484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": { + "type": 3 + }, + "seed": 198185882, + "version": 375, + "versionNonce": 411814042, + "isDeleted": false, + "boundElements": [ + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211901272, + "link": null, + "locked": false + }, + { + "id": "lYB2Gm5YLXtUdQ0eGqYgo", + "type": "freedraw", + "x": 1208.921875, + "y": 367.125, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0D", + "roundness": null, + "seed": 901415366, + "version": 4, + "versionNonce": 110577818, + "isDeleted": true, + "boundElements": [], + "updated": 1769211701877, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true + }, + { + "id": "tsRvMJO_5sEYyzqg0Hh7c", + "type": "line", + "x": 929.01171875, + "y": 535.390625, + "width": 65.09375, + "height": 64.51171875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0E", + "roundness": { + "type": 2 + }, + "seed": 1228634182, + "version": 113, + "versionNonce": 628743877, + "isDeleted": false, + "boundElements": [], + "updated": 1769807129093, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 65.09375, + 64.51171875 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "oACgbs4cMqbhu8HtPebFx", + "type": "line", + "x": 932.6796875, + "y": 709.16015625, + "width": 61.11328125, + "height": 36.8828125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0F", + "roundness": { + "type": 2 + }, + "seed": 983491994, + "version": 68, + "versionNonce": 1634049963, + "isDeleted": false, + "boundElements": [], + "updated": 1769807122256, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 61.11328125, + -36.8828125 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "3d5gOfYF1IUhTLFt9bgVo", + "type": "rectangle", + "x": 981.53125, + "y": 261.81640625, + "width": 263.3984375, + "height": 76.671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 160618182, + "version": 76, + "versionNonce": 374670022, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + } + ], + "updated": 1769211893845, + "link": null, + "locked": false + }, + { + "id": "otyemJxFJECPTvDVhd1hb", + "type": "text", + "x": 1047.35546875, + "y": 283.36328125, + "width": 129.50054931640625, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 400406918, + "version": 91, + "versionNonce": 1488222219, + "isDeleted": false, + "boundElements": [], + "updated": 1769807077202, + "link": null, + "locked": false, + "text": "OVN-Kube", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN-Kube", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1chdpPBLx0VEf6sBwnyrT", + "type": "rectangle", + "x": 542.08203125, + "y": 300.78515625, + "width": 179.29296875, + "height": 75.30859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 1050229210, + "version": 33, + "versionNonce": 549854406, + "isDeleted": false, + "boundElements": [], + "updated": 1769211856008, + "link": null, + "locked": false + }, + { + "id": "E1qyG2mrQnzUtFZRnlzon", + "type": "text", + "x": 577.8984375, + "y": 320.80078125, + "width": 99.06443786621094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1111918810, + "version": 12, + "versionNonce": 1083119301, + "isDeleted": false, + "boundElements": [], + "updated": 1769807072474, + "link": null, + "locked": false, + "text": "Kubelet", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubelet", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YPI4K5hgJbgHj0hYUQy7U", + "type": "rectangle", + "x": 983.12890625, + "y": 410.5234375, + "width": 260.94921875, + "height": 99.75390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 2135829062, + "version": 161, + "versionNonce": 1398047066, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211899581, + "link": null, + "locked": false + }, + { + "id": "xdskfV2GHpQr09QrrldPB", + "type": "text", + "x": 1081.9375, + "y": 441.5078125, + "width": 55.74822998046875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1687121690, + "version": 128, + "versionNonce": 1673910021, + "isDeleted": false, + "boundElements": [], + "updated": 1769807080854, + "link": null, + "locked": false, + "text": "OVN", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow", + "x": 1108.43359375, + "y": 337.89453125, + "width": 2.078125, + "height": 73.390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": { + "type": 2 + }, + "seed": 1411280134, + "version": 63, + "versionNonce": 800499334, + "isDeleted": false, + "boundElements": [], + "updated": 1769211896239, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2.078125, + 73.390625 + ] + ], + "startBinding": { + "elementId": "3d5gOfYF1IUhTLFt9bgVo", + "mode": "inside", + "fixedPoint": [ + 0.48178852142963075, + 0.9922559608722233 + ] + }, + "endBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.48815172971273746, + 0.007635979167482476 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow", + "x": 1112.98828125, + "y": 508.2265625, + "width": 0.453125, + "height": 66.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0N", + "roundness": { + "type": 2 + }, + "seed": 1453237018, + "version": 64, + "versionNonce": 374911322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211905520, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -0.453125, + 66.83984375 + ] + ], + "startBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.4976423214526294, + 0.9794415945490856 + ] + }, + "endBinding": { + "elementId": "tYzSvRmwfQQNkfrVTe1P5", + "mode": "inside", + "fixedPoint": [ + 0.5088366834588591, + 0.016170535496627753 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "nOHqcMRCCN5CvsIk9L7HK", + "type": "text", + "x": 1083.96875, + "y": 624.0546875, + "width": 55.468231201171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 260809434, + "version": 68, + "versionNonce": 1104957285, + "isDeleted": false, + "boundElements": [], + "updated": 1769807084822, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "4IG9MXRe5ijYB-oIc24C6", + "type": "rectangle", + "x": 1067.27734375, + "y": 782.578125, + "width": 97.47265625, + "height": 31.69921875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0P", + "roundness": { + "type": 3 + }, + "seed": 1419364058, + "version": 56, + "versionNonce": 1950880902, + "isDeleted": false, + "boundElements": [], + "updated": 1769211952988, + "link": null, + "locked": false + }, + { + "id": "nIejWKUhYhhAgOj88nUB7", + "type": "text", + "x": 1086.921875, + "y": 786.23828125, + "width": 64.98826599121094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Q", + "roundness": null, + "seed": 1034482586, + "version": 33, + "versionNonce": 1547842859, + "isDeleted": false, + "boundElements": [], + "updated": 1769807134429, + "link": null, + "locked": false, + "text": "eth0", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "eth0", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xkJunDpSamP7rfEtN7oH1", + "type": "line", + "x": 1115.1484375, + "y": 783.14453125, + "width": 1.234375, + "height": 65.4609375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0R", + "roundness": { + "type": 2 + }, + "seed": 1791101850, + "version": 51, + "versionNonce": 500083802, + "isDeleted": false, + "boundElements": [], + "updated": 1769211973078, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1.234375, + -65.4609375 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "lzux7Pz5JYywiZFFYn3c9", + "type": "diamond", + "x": 746.962890625, + "y": 619.037109375, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": { + "type": 2 + }, + "seed": 1138066597, + "version": 917, + "versionNonce": 364881579, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ZIHVxeeORZWrsAYttnEg3" + } + ], + "updated": 1769807115636, + "link": null, + "locked": false + }, + { + "id": "ZIHVxeeORZWrsAYttnEg3", + "type": "text", + "x": 800.2396469116211, + "y": 689.4814453125, + "width": 84.95234680175781, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 979871749, + "version": 870, + "versionNonce": 1945999973, + "isDeleted": false, + "boundElements": [], + "updated": 1769807118450, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "lzux7Pz5JYywiZFFYn3c9", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff", + "lockedMultiSelections": {} + }, + "files": {} +} \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.svg b/docs/images/ovnk-unaccelerated.svg new file mode 100644 index 0000000000..a2bb8222cf --- /dev/null +++ b/docs/images/ovnk-unaccelerated.svg @@ -0,0 +1,4 @@ + + +Kubernetes Worker NodePod AOVN-KubeKubeletOVNOVSeth0Pod B \ No newline at end of file diff --git a/docs/installation/launching-ovn-kubernetes-with-dpu.md b/docs/installation/launching-ovn-kubernetes-with-dpu.md new file mode 100644 index 0000000000..b7ad6c3613 --- /dev/null +++ b/docs/installation/launching-ovn-kubernetes-with-dpu.md @@ -0,0 +1,189 @@ +# Launching OVN-Kubernetes in DPU-Accelerated environment in interconnect mode + +## OVN K8s cluster setup + +OVN K8s CNI in a DPU-Accelerated environment is deployed using two Kubernetes clusters, one for the hosts and other for the DPUs. + +DPUs in the DPU cluster will watch DPU Host cluster for K8s resources such as Pods, Namespaces, NetworkAttachmentDefinitions, Services, and Endpoints and act on updates to those resources. Hence they require credentials to access DPU host cluster. Each DPU will have a setting denoting the DPU host to which it is associated. + +Refer [DPU support](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/dpu-support.md) for more details on the setup. + +## SR-IOV settings on DPU Host + +Follow [OVS Acceleration with Kernel datapath](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/ovs-kernel.md) or [OVS Acceleration with DOCA datapath](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/ovs-doca.md) to enable Open vSwitch hardware offloading feature on DPU hosts. + +A single VF net-device or a group of VF net-devices (configured as SR-IOV device plugin resource pool) need to be setup separately to create management port(s). + +## K8s Settings on DPU Host + +The following node labels must be set on the DPU Host prior to installing OVN K8s CNI + +```yaml +k8s.ovn.org/dpu-host= +k8s.ovn.org/zone-name="dpu-host node name" +``` + +## Launching OVN K8s DPU Host cluster using helm +OVN K8s CNI can be deployed using helm charts provided under [OVN K8s Helm Charts](https://github.com/ovn-kubernetes/ovn-kubernetes/tree/master/helm/ovn-kubernetes). Refer [Launching OVN-Kubernetes using Helm Charts](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/installation/launching-ovn-kubernetes-with-helm.md) for general instructions on using helm charts and explanation of common values used in various subcharts. + +For DPU Hosts cluster use values-single-node-zone.yaml by setting the following fields as specified. The other fields in the file can be set as needed. + +```yaml +tags: + ovnkube-node-dpu-host: true # Removing this line will also enable applying ovnkube-node-dpu-host subchart + ovs-node: false # Disable ovs-node subchart, as OVS is already provided by the corresponding DPU +global: + enableOvnKubeIdentity: false # This feature is not supported currently for clusters with DPU/DPU-Hosts +``` + +ovn-kubernetes image to be used in the containers should be provided in the image section +```yaml +global: + image: + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora + tag: master +``` + +Management port netdevice information should be provided in values.yaml file under helm/ovn-kubernetes/charts/ovnkube-node-dpu-host. For example, +```yaml +nodeMgmtPortNetdev: "enp1s0f0v0" # Single VF net-device to be used for management port or +mgmtPortVFResourceName: "mgmtport_vfs" # SR-IOV device plugin resource pool from which VF net-device(s) can be selected. +mgmtPortVFsCount: 2 # If using UDNs, the number of VFs required to handle management ports, which depends on the number of primary UDNs needed should be specified. +``` + +mgmtPortVFResourceName will be prioritized over nodeMgmtPortNetdev if both are specified. +If using UDNs, mgmtPortVFResourceName and mgmtPortVFsCount should be specified. + +Launch OVN K8s using +``` +helm install ovn-kubernetes . -f values-single-node-zone.yaml +``` + +## Generating credentials for accessing this cluster from DPU + +After deploying the CNI, create a secret in this cluster for service account ovnkube-node by applying the following +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: ovnkube-node-sa-for-dpu + namespace: ovn-kubernetes + annotations: + kubernetes.io/service-account.name: ovnkube-node +type: kubernetes.io/service-account-token +``` + +Get the value of ca.crt and token, which will be used in the DPU cluster. The token should be base64 decoded, but the encoded ca.crt should be used as is. + +## K8s Settings on DPU + +The following node label is required on DPUs prior to installing OVN K8s CNI +```yaml +k8s.ovn.org/dpu= +``` + +## OVS settings on DPU +Some OVS settings are required on the DPU to enable hardware offloads, connect to the right DPU-host in the DPU-host cluster and correctly steer traffic flows. + +Consider an example with ovs bridge configuration on DPU and network settings on DPU and DPU Host as below. + +``` +ovs-vsctl show + Bridge brp0 + fail_mode: standalone + Port pf0hpf + tag: 3 + Interface pf0hpf + type: system + Port p0 + Interface p0 + type: system + Port vtep0 + tag: 2 + Interface vtep0 + type: internal + Port brp0 + Interface brp0 + type: internal +``` + +``` +$ ip addr show dev brp0 +4: brp0: mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 52:54:00:a1:b2:c3 brd ff:ff:ff:ff:ff:ff + inet 192.0.2.10/24 brd 192.0.2.255 scope global brp0 + valid_lft forever preferred_lft forever + +$ ip addr show dev vtep0 +5: vtep0: mtu 1450 qdisc noqueue state UNKNOWN group default qlen 1000 + link/ether 52:54:00:d4:e5:f6 brd ff:ff:ff:ff:ff:ff + inet 198.51.100.10/24 brd 198.51.100.255 scope global vtep0 + valid_lft forever preferred_lft forever +``` + +On the DPU host with node name dpu-host, the IP address is set as + +``` +$ ip addr show dev enp1s0f0 +2: enp1s0f0: mtu 1500 qdisc fq_codel state UP group default qlen 1000 + link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff + inet 203.0.113.10/24 brd 203.0.113.255 scope global eth0 + valid_lft forever preferred_lft forever + +$ ip route show default +default via 203.0.113.1 dev enp1s0f0 proto static +``` + +Router subnet is 203.0.113.0/24 + +The required OVS settings are as below. The values provided are taken from the above example. + +``` +other_config:hw-offload=true - enable hardware offloading +external_ids:host-k8s-nodename="dpu-host" - name of DPU-Host node +external_ids:hostname="dpu" - OVN Chassis hostname of the DPU +external_ids:ovn-encap-ip="198.51.100.10" - encapsulation IP of the DPU +external_ids:ovn-encap-type="geneve" - supported encapsulation type +external_ids:ovn-gw-interface="brp0" - interface on the DPU that serves as gateway interface +external_ids:ovn-gw-nexthop="203.0.113.1" - default gateway address for the DPU-Host network +external_ids:ovn-gw-router-subnet="203.0.113.0/24" - subnet to be used for the gateway router if DPU is in a different subnet than DPU-Host network +external_ids:ovn-gw-vlanid="3" - optional setting if VLAN id of gateway is not on native VLAN +``` + +## Launching OVN K8s DPU cluster + +Once the DPU-host cluster is deployed, the credentials to access that cluster is needed for DPU cluster deployment. It also requires additional information regarding OVN K8s configuration. + +Use values-single-node-zone-dpu.yaml for deploying the DPU cluster. Only the ovnkube-single-node-zone-dpu chart has to be installed and is enabled by default. The rest of the charts are disabled by setting them to false under the tags section and it should not be changed. + +Set the following field as specified. +```yaml +global: + enableOvnKubeIdentity: false # This feature is not supported currently for clusters with DPU/DPU-Hosts +``` + +The following DPU Host cluster related information must be provided. +```yaml +global: + dpuHostClusterK8sAPIServer: "https://172.25.0.2:6443" # Endpoint of DPU Host cluster's K8s API server + dpuHostClusterK8sToken: "" # DPU Host cluster's K8s Access Token base64 decoded + dpuHostClusterK8sCACertData: "" # DPU Host cluster's encoded K8s Access Certs Data + dpuHostClusterNetworkCIDR: "10.244.0.0/16/24" # DPU Host cluster's Network CIDR + dpuHostClusterServiceCIDR: "10.96.0.0/16" # DPU Host cluster's Service CIDR + mtu: "1400" # MTU of network interface in K8s pod +``` + +ovn-kubernetes image to be used in the containers should be provided in the dpuImage section. It should be built for arm64 architecture. +```yaml +global: + dpuImage: + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu + tag: master +``` + +The rest of the fields can be set as needed. + +Launch OVN K8s using +``` +helm install ovn-kubernetes . -f values-single-node-zone-dpu.yaml +``` diff --git a/docs/observability/ovn-observability.md b/docs/observability/ovn-observability.md index 5810fea58a..b79cfa6e39 100644 --- a/docs/observability/ovn-observability.md +++ b/docs/observability/ovn-observability.md @@ -7,6 +7,7 @@ specific OVS flows are matched. To see the generated samples, a binary called `o This binary allows printing the samples to stdout or writing them to a file. Currently, supports observability for: + - Network Policy - (Baseline) Admin Network Policy - Egress firewall @@ -37,28 +38,44 @@ insights of what ovn-kubernetes is doing with a packet and why. To enable this feature, use `--observability` flag with `kind.sh` script or `--enable-observability` flag with `ovnkube` binary. -To see the samples, use `ovnkube-observ` binary, use `-h` to see allowed flags. +To see the samples, use `ovnkube-observ` binary, with `-h` to see allowed flags. `ovnkube-observ` is installed on the ovnkube pods. For example: -This feature requires OVS 3.4 and linux kernel 6.11. +``` +kubectl -n ovn-kubernetes exec -it -c ovnkube-controller -- ovnkube-observ -h +Usage of ovnkube-observ: + -add-ovs-collector + Add ovs collector to enable sampling. Use with caution. Make sure no one else is using observability. + -enable-enrichment + Enrich samples with nbdb data. (default true) + -filter-dst-ip string + Filter in only packets to a given destination ip. + -filter-src-ip string + Filter in only packets from a given source ip. + -log-cookie + Print raw sample cookie with psample group_id. + -output-file string + Output file to write the samples to. + -print-full-packet + Print full received packet. When false, only src and dst ips are printed with every sample. +``` -As of Aug 2024, the kernel need to be built from the source, therefore to try this feature you need to: -- rebuild the kernel with the current master branch from [Linus' tree](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git) - - to rebuild on fedora: https://docs.fedoraproject.org/en-US/quick-docs/kernel-build-custom/#_building_a_vanilla_upstream_kernel -- Build an ovn-kubernetes image that uses the latest OVS/OVN code: -`OVS_BRANCH=main make -C dist/images fedora-dev-local-gw-deployment` -- Start kind with that image, use `-ov localhost/ovn-daemonset-fedora:latest` flag with `kind.sh` script. +This feature requires OVS 3.4 and linux kernel 6.11. ## Workflow Description - Observability is enabled by setting the `--enable-observability` flag in the `ovnkube` binary. - For now all mentioned features are enabled by this flag at the same time. -- `ovnkube-observ` binary is used to see the samples. Samples are only generated when the real traffic matching the ACLs -is sent through the OVS. An example output is: +- To start observing and display the samples, run `ovnkube-observ -add-ovs-collector`. Samples are only generated when the real traffic matching the ACLs is sent through the OVS. An example output is: + ``` OVN-K message: Allowed by default allow from local node policy, direction ingress src=10.129.2.2, dst=10.129.2.5 ``` +## Support in observability tools + +- [NetObserv](https://github.com/netobserv/network-observability-operator): through the `NetworkEvents` agent feature. + ## Implementation Details ### User facing API Changes diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index 7a4ab3bc9a..826c6ffc8f 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -14,6 +14,7 @@ import ( "text/template" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/urfave/cli/v2" "k8s.io/apimachinery/pkg/util/sets" @@ -269,6 +270,15 @@ func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { return mode, nil } +// Determine if we should serve both ovnkube-node and OVN/OVS metrics on a single endpoint. +func combineMetricsEndpoints(runMode *ovnkubeRunMode) bool { + return runMode != nil && + runMode.node && + config.Metrics.BindAddress != "" && + config.Metrics.BindAddress == config.Metrics.OVNMetricsBindAddress && + config.OvnKubeNode.Mode != types.NodeModeDPUHost +} + func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { pidfile := ctx.String("pidfile") if pidfile != "" { @@ -319,9 +329,9 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { eventRecorder := util.EventRecorder(ovnClientset.KubeClient) - // Start metric server for master and node. Expose the metrics HTTP endpoint if configured. + // Start the general metrics server only when not combined. // Non LE master instances also are required to expose the metrics server. - if config.Metrics.BindAddress != "" { + if config.Metrics.BindAddress != "" && !combineMetricsEndpoints(runMode) { metrics.StartMetricsServer(config.Metrics.BindAddress, config.Metrics.EnablePprof, config.Metrics.NodeServerCert, config.Metrics.NodeServerPrivKey, ctx.Done(), ovnKubeStartWg) } @@ -611,7 +621,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util // start the prometheus server to serve OVS and OVN Metrics (default port: 9476) // Note: for ovnkube node mode dpu-host no metrics is required as ovs/ovn is not running on the node. - if config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { + if runMode.node && config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { if ovsClient == nil { ovsClient, err = libovsdb.NewOVSClient(ctx.Done()) @@ -631,6 +641,12 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util EnableOVNDBMetrics: true, } + if combineMetricsEndpoints(runMode) { + // Reuse the default registry (and its gatherer) so ovnkube-node metrics and OVN metrics share one endpoint. + opts.Registerer = prometheus.DefaultRegisterer + opts.EnablePprof = config.Metrics.EnablePprof + } + if !config.OVNKubernetesFeature.EnableInterconnect { // In Central mode, OVNKube Node doesn't need to register OVN Northd and DB metrics unless // OVNKube Master Pod is running on this node. diff --git a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go index 1bb5593609..ad9f5e2b50 100644 --- a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go @@ -89,8 +89,9 @@ func (n *HONodeController) AddPod(pod *corev1.Pod) error { _, ok := pod.Annotations[util.OvnPodAnnotationName] if ok { klog.Infof("Remove the ovnkube pod annotation from pod %s", pod.Name) - delete(pod.Annotations, util.OvnPodAnnotationName) - if err := n.kube.UpdatePodStatus(pod); err != nil { + podToUpdate := pod.DeepCopy() + delete(podToUpdate.Annotations, util.OvnPodAnnotationName) + if err := n.kube.UpdatePodStatus(podToUpdate); err != nil { return fmt.Errorf("failed to remove ovnkube pod annotation from pod %s: %v", pod.Name, err) } return nil diff --git a/go-controller/pkg/allocator/id/allocator.go b/go-controller/pkg/allocator/id/allocator.go index ef4900c7ce..20ed9798b4 100644 --- a/go-controller/pkg/allocator/id/allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -16,7 +16,7 @@ const ( type Allocator interface { AllocateID(name string) (int, error) ReserveID(name string, id int) error - ReleaseID(name string) + ReleaseID(name string) int ForName(name string) NamedAllocator GetID(name string) int } @@ -25,7 +25,7 @@ type Allocator interface { type NamedAllocator interface { AllocateID() (int, error) ReserveID(int) error - ReleaseID() + ReleaseID() int } // idAllocator is used to allocate id for a resource and store the resource - id in a map @@ -90,15 +90,18 @@ func (idAllocator *idAllocator) ReserveID(name string, id int) error { return nil } -// ReleaseID releases the id allocated for the resource 'name' -func (idAllocator *idAllocator) ReleaseID(name string) { +// ReleaseID releases the id allocated for the resource 'name'. +// Returns the released id, or -1 if no id was allocated for that name. +func (idAllocator *idAllocator) ReleaseID(name string) int { idAllocator.nameIdMap.LockKey(name) defer idAllocator.nameIdMap.UnlockKey(name) v, ok := idAllocator.nameIdMap.Load(name) if ok { idAllocator.idBitmap.Release(v) idAllocator.nameIdMap.Delete(name) + return v } + return invalidID } func (idAllocator *idAllocator) ForName(name string) NamedAllocator { @@ -129,8 +132,8 @@ func (allocator *namedAllocator) ReserveID(id int) error { return allocator.allocator.ReserveID(allocator.name, id) } -func (allocator *namedAllocator) ReleaseID() { - allocator.allocator.ReleaseID(allocator.name) +func (allocator *namedAllocator) ReleaseID() int { + return allocator.allocator.ReleaseID(allocator.name) } // idsAllocator is used to allocate multiple ids for a resource and store the resource - ids in a map diff --git a/go-controller/pkg/allocator/id/allocator_test.go b/go-controller/pkg/allocator/id/allocator_test.go index 79b783fbf8..2803d6ea81 100644 --- a/go-controller/pkg/allocator/id/allocator_test.go +++ b/go-controller/pkg/allocator/id/allocator_test.go @@ -5,6 +5,43 @@ import ( "testing" ) +func TestIDAllocator_ReleaseID(t *testing.T) { + t.Run("returns allocated ID when releasing", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + id, err := allocator.AllocateID("resource1") + if err != nil { + t.Fatalf("AllocateID() unexpected error: %v", err) + } + + got := allocator.ReleaseID("resource1") + if got != id { + t.Errorf("ReleaseID() = %d, want %d", got, id) + } + if allocator.GetID("resource1") != -1 { + t.Error("GetID() should return -1 after release") + } + }) + + t.Run("returns -1 when releasing already released resource", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + if _, err := allocator.AllocateID("resource1"); err != nil { + t.Fatalf("AllocateID() unexpected error: %v", err) + } + allocator.ReleaseID("resource1") + + if got := allocator.ReleaseID("resource1"); got != -1 { + t.Errorf("ReleaseID() = %d, want -1", got) + } + }) + + t.Run("returns -1 when releasing non-existent resource", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + if got := allocator.ReleaseID("nonexistent"); got != -1 { + t.Errorf("ReleaseID() = %d, want -1", got) + } + }) +} + func TestIDsAllocator(t *testing.T) { // create allocator with range [3, 8] allocator := newIDsAllocator("test", 6, 3) diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 41b1f6d3f9..8159c39da7 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -64,8 +64,9 @@ func (a *idAllocatorStub) ReserveID(int) error { return a.reserveIDError } -func (a *idAllocatorStub) ReleaseID() { +func (a *idAllocatorStub) ReleaseID() int { a.releasedID = true + return a.nextID } type persistentIPsStub struct { diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 7d3d1b44c8..12ce158aa0 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -23,6 +23,7 @@ import ( udntemplate "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" @@ -160,6 +161,10 @@ func NewClusterManager( } if util.IsNetworkSegmentationSupportEnabled() { + var vtepInformer vtepinformer.VTEPInformer + if util.IsEVPNEnabled() { + vtepInformer = wf.VTEPInformer() + } udnController := udncontroller.New( ovnClient.NetworkAttchDefClient, wf.NADInformer(), ovnClient.UserDefinedNetworkClient, @@ -168,6 +173,7 @@ func NewClusterManager( cm.networkManager.Interface(), wf.PodCoreInformer(), wf.NamespaceInformer(), + vtepInformer, cm.recorder, ) cm.userDefinedNetworkController = udnController diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index 7b0f2a2e39..891ec2c0ce 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -531,7 +531,26 @@ func (eIPC *egressIPClusterController) getSortedEgressData() ([]*egressNode, map return assignableNodes, allAllocations } -func (eIPC *egressIPClusterController) initEgressNodeReachability(_ []interface{}) error { +func (eIPC *egressIPClusterController) initEgressNodeReachability(objs []interface{}) error { + for _, obj := range objs { + node := obj.(*corev1.Node) + if err := eIPC.initEgressIPAllocator(node); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + } + + // Before reconciling unassigned EgressIPs, ensure the allocator cache is populated + // with existing assignments from EgressIP statuses. This prevents duplicate IP + // assignments when two EgressIPs have the same IP in their specs but only one has + // it assigned in status (e.g., after control-plane restart or during initial sync). + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs { + eIPC.ensureAllocatorEgressIPAssignments(egressIP) + } + go eIPC.checkEgressNodesReachability() return nil } @@ -990,11 +1009,6 @@ func (eIPC *egressIPClusterController) reconcileEgressIP(old, new *egressipv1.Eg statusToRemove = append(statusToRemove, status) ipsToRemove.Insert(status.EgressIP) } - // Adding the mark to annotations is bundled with status update in-order to minimise updates, cover the case where there is no update to status - // and mark annotation has been modified / removed. This should only occur for an update and the mark was previous set. - if ipsToAssign.Len() == 0 && ipsToRemove.Len() == 0 { - eIPC.ensureMark(old, new) - } if ipsToRemove.Len() > 0 { // The following is added as to ensure that we only add after having @@ -1228,7 +1242,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "EgressIPConflict", "Egress IP %s with IP "+ "%v is conflicting with a host (%s) IP address and will not be assigned", name, eIP, conflictedHost) klog.Errorf("Egress IP: %v address is already assigned on an interface on node %s", eIP, conflictedHost) - return assignments + continue } if status, exists := existingAllocations[eIP.String()]; exists { // On public clouds we will re-process assignments for the same IP @@ -1280,7 +1294,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] "IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node, ) klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) - return assignments + continue } } // Egress IP for secondary host networks is only available on baremetal environments @@ -1825,10 +1839,21 @@ func generateStatusPatchOp(statusItems []egressipv1.EgressIPStatusItem) jsonPatc } } +// ensureAllocatorEgressIPAssignments adds EgressIP assignments to the allocator cache +// if the EgressIP has status items. This is critical to prevent duplicate IP assignments +// during restart when EgressIPs are processed in arbitrary order. +func (eIPC *egressIPClusterController) ensureAllocatorEgressIPAssignments(egressIP *egressipv1.EgressIP) { + if len(egressIP.Status.Items) > 0 { + eIPC.addAllocatorEgressIPAssignments(egressIP.Name, egressIP.Status.Items) + } +} + // syncEgressIPMarkAllocator iterates over all existing EgressIPs. It builds a mark cache of existing marks stored on each -// EgressIP annotation or allocates and adds a new mark to an EgressIP if it doesn't exist +// EgressIP annotation or allocates and adds a new mark to an EgressIP if it doesn't exist. func (eIPC *egressIPClusterController) syncEgressIPMarkAllocator(egressIPs []interface{}) error { - // reserve previously assigned marks + // Reserve previously assigned marks. Note: the allocator cache is pre-populated with + // existing assignments from EgressIP statuses in initEgressNodeReachability, which runs + // before this sync function. for _, object := range egressIPs { egressIP, ok := object.(*egressipv1.EgressIP) if !ok { @@ -1880,22 +1905,6 @@ func getEgressIPMarkAllocator() id.Allocator { return id.NewIDAllocator("eip_mark", eipMarkMax-eipMarkMin) } -// ensureMark ensures that if a mark was remove or changed value, then restore the mark. -func (eIPC *egressIPClusterController) ensureMark(old, new *egressipv1.EgressIP) { - // Adding the mark to annotations is bundled with status update in-order to minimise updates, cover the case where there is no update to status - // and mark annotation has been modified / removed. This should only occur for an update and the mark was previous set. - if old != nil && new != nil { - if util.IsEgressIPMarkSet(old.Annotations) && util.EgressIPMarkAnnotationChanged(old.Annotations, new.Annotations) { - mark, _, err := eIPC.getOrAllocMark(new.Name) - if err != nil { - klog.Errorf("Failed to restore EgressIP %s mark because unable to retrieve mark: %v", new.Name, err) - } else if err = eIPC.patchEgressIP(new.Name, generateMarkPatchOp(mark)); err != nil { - klog.Errorf("Failed to restore EgressIP %s mark because patching failed: %v", new.Name, err) - } - } - } -} - // getOrAllocMark allocates a new mark integer for name using round-robin strategy if none was already allocated for name otherwise // returns the previously allocated mark. // The mark is bounded by util.EgressIPMarkBase & util.EgressIPMarkMax inclusive. diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go index c89b8b58cf..95d06bc0b1 100644 --- a/go-controller/pkg/clustermanager/egressip_controller_test.go +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/util/retry" utilnet "k8s.io/utils/net" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -1368,12 +1367,23 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { I0212 20:22:37.643187 1837759 egressip_controller.go:1173] Current assignments are: map[] I0212 20:22:37.643205 1837759 egressip_controller.go:1175] Will attempt assignment for egress IP: 192.168.126.51 E0212 20:22:37.643254 1837759 egressip_controller.go:1190] Egress IP: 192.168.126.51 address is already assigned on an interface on node node2*/ - gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(4)) - for i := 0; i < 4; i++ { + gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(8)) + conflictCount := 0 + noMatchingCount := 0 + for i := 0; i < 8; i++ { recordedEvent := <-fakeClusterManagerOVN.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring( - "EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned")) + gomega.Expect(recordedEvent).To(gomega.SatisfyAny( + gomega.ContainSubstring("EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned"), + gomega.ContainSubstring("NoMatchingNodeFound No matching nodes found, which can host any of the egress IPs: [192.168.126.51] for object EgressIP: egressip"))) + if strings.Contains(recordedEvent, "EgressIPConflict") { + conflictCount++ + } + if strings.Contains(recordedEvent, "NoMatchingNodeFound") { + noMatchingCount++ + } } + gomega.Expect(conflictCount).To(gomega.Equal(4)) + gomega.Expect(noMatchingCount).To(gomega.Equal(4)) return nil } @@ -3288,24 +3298,7 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { assignedMark, err := strconv.Atoi(assignedMarkStr) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to convert mark to string") - ginkgo.By("clear mark to cause update and expect restoration of mark") - gomega.Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { - eIP, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - if err != nil { - return err - } - eIP.Annotations = map[string]string{} - _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIP, metav1.UpdateOptions{}) - return err - })).ShouldNot(gomega.HaveOccurred(), "failed to update EgressIP object") - ginkgo.By("confirm the original mark is restored") - gomega.Eventually(getEgressIPAnnotationValue(eIP.Name)).ShouldNot(gomega.BeEmpty()) - assignedMarkStr, err = getEgressIPAnnotationValue(eIP.Name)() - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to get egress IP mark from annotations") - assignedMarkAfterUpdate, err := strconv.Atoi(assignedMarkStr) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to convert mark to string") - gomega.Expect(assignedMark).Should(gomega.Equal(assignedMarkAfterUpdate), "Mark should be identical if annotation is cleared") - ginkgo.By("confirm cache is unchanged") + ginkgo.By("confirm cache is set correctly") cachedMark, _, err := fakeClusterManagerOVN.eIPC.getOrAllocMark(eIP.Name) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Expect(cachedMark).Should(gomega.Equal(assignedMark), "EIP annotation and cache mark integer must be the same") @@ -4218,6 +4211,122 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + // This test validates that when two EgressIP CRs have the same IP in their specs, + // and one already has the IP assigned in status (from before restart), the sync + // function properly pre-populates the allocator cache to prevent duplicate assignment. + // This is a regression test for the bug where duplicate IPs were assigned during + // control-plane pod restart because the allocator cache wasn't populated from + // existing EgressIP statuses before processing individual ADD events. + ginkgo.It("should not assign duplicate IP during restart when two EgressIPs have same IP in spec", func() { + app.Action = func(*cli.Context) error { + duplicateIP := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" + + node1 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + node2 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\": [\"%s\",\"%s\"]}", v4NodeSubnet, v6NodeSubnet), + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + + // eIP1 has the IP assigned in status (simulating state from before restart) + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip-1"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{duplicateIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: duplicateIP, + Node: node1Name, + }, + }, + }, + } + + // eIP2 has the same IP in spec but NOT in status (unassigned, but was created + // with duplicate IP - which should have been rejected but wasn't due to a bug + // or manual API manipulation) + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip-2"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{duplicateIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &corev1.NodeList{Items: []corev1.Node{node1, node2}}, + // Both EgressIPs exist at startup - simulating restart scenario + &egressipv1.EgressIPList{Items: []egressipv1.EgressIP{eIP1, eIP2}}, + ) + + // Use WatchEgressNodes to properly initialize the allocator cache + // (simulating real startup behavior rather than manually setting up cache) + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // eIP1 should keep its assignment (the IP was already assigned) + gomega.Eventually(getEgressIPStatusLen("egressip-1")).Should(gomega.Equal(1)) + egressIPs1, nodes1 := getEgressIPStatus("egressip-1") + gomega.Expect(nodes1[0]).To(gomega.Equal(node1Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(duplicateIP)) + + // eIP2 should NOT get the duplicate IP assigned (not even to node2) - + // it should remain unassigned because initEgressNodeReachability pre-populated the + // cache with eIP1's assignment + gomega.Eventually(getEgressIPStatusLen("egressip-2")).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("AddEgressIP for IPv4", func() { diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go index 66f95f1e83..6e97f0a7d0 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go @@ -252,7 +252,7 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e return err } - if namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { return nil } diff --git a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go index 89e0eb2b8b..a48036e6ab 100644 --- a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go +++ b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go @@ -38,8 +38,8 @@ var ( func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceName string, nadLister nadlisters.NetworkAttachmentDefinitionLister) (nadKey string, network util.NetInfo, err error) { namespacePrimaryNetwork, err := networkMgr.GetActiveNetworkForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { - // We intentionally ignore the unprocessed active network error because + if util.IsInvalidPrimaryNetworkError(err) { + // We intentionally ignore the invalid primary network error because // UDN Controller hasn't created the NAD yet, OR NAD doesn't exist in a // namespace that has the required UDN label. It could also be that the // UDN was deleted and the NAD is also gone. @@ -47,13 +47,13 @@ func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceNam } return "", nil, err } - if namespacePrimaryNetwork.IsDefault() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() { // No primary UDN in this namespace return "", nil, nil } primaryNADKey, err := networkMgr.GetPrimaryNADForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { return "", nil, nil } return "", nil, err diff --git a/go-controller/pkg/clustermanager/networkconnect/controller.go b/go-controller/pkg/clustermanager/networkconnect/controller.go index 8bec1787ee..654f69d33d 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller.go @@ -407,16 +407,19 @@ func (c *Controller) mustProcessCNCForNAD(nad *nadv1.NetworkAttachmentDefinition continue } for _, namespace := range namespaces { - primaryNAD, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) + nsPrimaryNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { continue } klog.Errorf("Failed to get active network for namespace %s: %v", namespace.Name, err) continue } + if nsPrimaryNetwork == nil { + continue + } networkName := c.networkManager.GetNetworkNameForNADKey(nadKey) - if networkName != "" && networkName == primaryNAD.GetNetworkName() { + if networkName != "" && networkName == nsPrimaryNetwork.GetNetworkName() { isSelected = true break selectorLoop } diff --git a/go-controller/pkg/clustermanager/node/node_allocator.go b/go-controller/pkg/clustermanager/node/node_allocator.go index e31625b725..5f1ba6ccc5 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator.go +++ b/go-controller/pkg/clustermanager/node/node_allocator.go @@ -343,7 +343,6 @@ func (na *NodeAllocator) syncNodeNetworkAnnotations(node *corev1.Node) error { func (na *NodeAllocator) HandleDeleteNode(node *corev1.Node) error { if na.hasHybridOverlayAllocation() { na.releaseHybridOverlayNodeSubnet(node.Name) - return nil } if na.hasNodeSubnetAllocation() || na.hasHybridOverlayAllocationUnmanaged() { diff --git a/go-controller/pkg/clustermanager/node/node_allocator_test.go b/go-controller/pkg/clustermanager/node/node_allocator_test.go index 37fee60d64..acdbc137bb 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/node_allocator_test.go @@ -12,7 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/listers/core/v1" + listersv1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" @@ -400,12 +400,12 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { } } -func newFakeNodeLister(nodes []*corev1.Node) v1.NodeLister { +func newFakeNodeLister(nodes []*corev1.Node) listersv1.NodeLister { indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) for _, node := range nodes { _ = indexer.Add(node) } - return v1.NewNodeLister(indexer) + return listersv1.NewNodeLister(indexer) } func TestController_CleanupStaleAnnotation(t *testing.T) { @@ -448,3 +448,105 @@ func TestController_CleanupStaleAnnotation(t *testing.T) { t.Fatalf("Expected annotation %s to be cleaned up, got %v", util.OVNNodeGRLRPAddrs, nodes.Items[0].Annotations) } } + +// TestNodeAllocator_HandleDeleteNode verifies that HandleDeleteNode correctly releases +// both standard cluster subnets and hybrid overlay subnets (if enabled) when a node is deleted. +func TestNodeAllocator_HandleDeleteNode(t *testing.T) { + origHybridEnabled := config.HybridOverlay.Enabled + origHybridSubnets := config.HybridOverlay.ClusterSubnets + origClusterSubnets := config.Default.ClusterSubnets + origNoHostSubnetNodes := config.Kubernetes.NoHostSubnetNodes + t.Cleanup(func() { + config.HybridOverlay.Enabled = origHybridEnabled + config.HybridOverlay.ClusterSubnets = origHybridSubnets + config.Default.ClusterSubnets = origClusterSubnets + config.Kubernetes.NoHostSubnetNodes = origNoHostSubnetNodes + }) + + config.HybridOverlay.Enabled = true + config.HybridOverlay.ClusterSubnets = []config.CIDRNetworkEntry{ + {CIDR: ovntest.MustParseIPNet("10.0.0.0/16"), HostSubnetLength: 24}, + } + + ranges, err := rangesFromStrings([]string{"172.16.0.0/16"}, []int{24}) + if err != nil { + t.Fatal(err) + } + config.Default.ClusterSubnets = ranges + + netInfo, err := util.NewNetInfo( + &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: types.DefaultNetworkName}, + }, + ) + if err != nil { + t.Fatal(err) + } + + na := &NodeAllocator{ + netInfo: netInfo, + clusterSubnetAllocator: NewSubnetAllocator(), + nodeLister: newFakeNodeLister([]*corev1.Node{}), + } + if na.hasHybridOverlayAllocation() { + na.hybridOverlaySubnetAllocator = NewSubnetAllocator() + } + + if !na.hasHybridOverlayAllocation() { + t.Fatal("Hybrid overlay allocation should be enabled given the test configuration") + } + + if err := na.Init(); err != nil { + t.Fatalf("Failed to initialize node allocator: %v", err) + } + + nodeName := "node-delete-test" + if !na.hasNodeSubnetAllocation() { + t.Fatal("Node subnet allocation should be enabled") + } + + allocated, _, err := na.allocateNodeSubnets(na.clusterSubnetAllocator, nodeName, nil, true, false) + if err != nil { + t.Fatalf("Failed to allocate subnet: %v", err) + } + if len(allocated) == 0 { + t.Fatal("No subnet allocated") + } + + v4used, _ := na.clusterSubnetAllocator.Usage() + if v4used != 1 { + t.Fatalf("Expected 1 allocated subnet, got %d", v4used) + } + + if na.hasHybridOverlayAllocation() { + if _, _, err := na.allocateNodeSubnets(na.hybridOverlaySubnetAllocator, nodeName, nil, true, false); err != nil { + t.Fatalf("Failed to allocate hybrid overlay subnet: %v", err) + } + hoUsed, _ := na.hybridOverlaySubnetAllocator.Usage() + if hoUsed != 1 { + t.Fatalf("Expected 1 allocated hybrid overlay subnet, got %d", hoUsed) + } + } + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + } + + if err := na.HandleDeleteNode(node); err != nil { + t.Fatalf("HandleDeleteNode failed: %v", err) + } + + v4usedAfter, _ := na.clusterSubnetAllocator.Usage() + if v4usedAfter != 0 { + t.Errorf("Subnet leak detected! Expected 0 allocated subnets, got %d", v4usedAfter) + } + + if na.hasHybridOverlayAllocation() { + hoUsedAfter, _ := na.hybridOverlaySubnetAllocator.Usage() + if hoUsedAfter != 0 { + t.Errorf("Hybrid overlay subnet leak detected! Expected 0 allocated subnets, got %d", hoUsedAfter) + } + } +} diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 5e5e65f25d..ab377aa759 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -113,11 +113,17 @@ func (a *PodAllocator) Init() error { func (a *PodAllocator) getActiveNetworkForPod(pod *corev1.Pod) (util.NetInfo, error) { activeNetwork, err := a.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return nil, err } + // Cluster manager pod allocation should always have an active network + if activeNetwork == nil { + newErr := fmt.Errorf("no active network found for pod %s/%s", pod.Namespace, pod.Name) + a.recordPodErrorEvent(pod, newErr) + return nil, newErr + } return activeNetwork, nil } @@ -131,7 +137,7 @@ func (a *PodAllocator) GetNetworkRole(pod *corev1.Pod) (string, error) { pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return "", err diff --git a/go-controller/pkg/clustermanager/pod/allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go index 987c401508..e11f287d8f 100644 --- a/go-controller/pkg/clustermanager/pod/allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -147,8 +147,9 @@ func (a *idAllocatorStub) ReserveID(string, int) error { panic("not implemented") // TODO: Implement } -func (a *idAllocatorStub) ReleaseID(string) { +func (a *idAllocatorStub) ReleaseID(string) int { a.released = true + return 0 } func (a *idAllocatorStub) ForName(string) id.NamedAllocator { diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller.go b/go-controller/pkg/clustermanager/routeadvertisements/controller.go index 463a5d0d67..75ce469089 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller.go @@ -50,6 +50,8 @@ import ( const ( generateName = "ovnk-generated-" fieldManager = "clustermanager-routeadvertisements-controller" + // evpnRawConfigPriority is set to an arbitrary value that still allows users to override EVPN config if needed. + evpnRawConfigPriority = 10 ) var ( @@ -324,6 +326,33 @@ type selectedNetworks struct { prefixLength map[string]uint32 // networkType is a map of selected network to their topology networkTopology map[string]string + // macVRFConfigs is an ordered list of MAC-VRF EVPN configurations for selected networks + macVRFConfigs []*vrfConfig + // ipVRFConfigs is an ordered list of IP-VRF EVPN configurations for selected networks + ipVRFConfigs []*ipVRFConfig + // networkTransport is a map of selected network to their transport mode + networkTransport map[string]string +} + +// vrfConfig holds base VRF EVPN configuration for a network +type vrfConfig struct { + // VNI is the VXLAN Network Identifier + VNI int32 + // RouteTarget is the BGP route target, empty means use FRR defaults + RouteTarget string +} + +// ipVRFConfig holds IP-VRF EVPN configuration for a network +type ipVRFConfig struct { + vrfConfig + // NetworkName is the name of the network this config belongs to + NetworkName string + // VRFName is the Linux VRF name + VRFName string + // HasIPv4 indicates if the network has IPv4 subnets + HasIPv4 bool + // HasIPv6 indicates if the network has IPv6 subnets + HasIPv6 bool } // generateFRRConfigurations generates FRRConfigurations for the route @@ -351,10 +380,11 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) // validate and gather information about the networks networkSet := sets.New[string]() selectedNetworks := &selectedNetworks{ - networkVRFs: map[string]string{}, - networkSubnets: map[string][]string{}, - prefixLength: map[string]uint32{}, - networkTopology: map[string]string{}, + networkVRFs: map[string]string{}, + networkSubnets: map[string][]string{}, + prefixLength: map[string]uint32{}, + networkTopology: map[string]string{}, + networkTransport: map[string]string{}, } for _, nad := range nads { networkName := util.GetAnnotatedNetworkName(nad) @@ -385,6 +415,43 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) selectedNetworks.vrfs = append(selectedNetworks.vrfs, vrf) selectedNetworks.networkVRFs[vrf] = networkName selectedNetworks.networkTopology[networkName] = network.TopologyType() + selectedNetworks.networkTransport[networkName] = network.Transport() + + // MAC-VRF configuration + if macVNI := network.EVPNMACVRFVNI(); macVNI > 0 { + selectedNetworks.macVRFConfigs = append(selectedNetworks.macVRFConfigs, &vrfConfig{ + VNI: macVNI, + RouteTarget: network.EVPNMACVRFRouteTarget(), + }) + } + + // IP-VRF configuration + if ipVNI := network.EVPNIPVRFVNI(); ipVNI > 0 { + // Compute IP families from network subnets + hasIPv4, hasIPv6 := false, false + for _, subnet := range network.Subnets() { + if subnet.CIDR.IP.To4() == nil { + hasIPv6 = true + } else { + hasIPv4 = true + } + } + selectedNetworks.ipVRFConfigs = append(selectedNetworks.ipVRFConfigs, &ipVRFConfig{ + vrfConfig: vrfConfig{ + VNI: ipVNI, + RouteTarget: network.EVPNIPVRFRouteTarget(), + }, + NetworkName: networkName, + VRFName: vrf, + HasIPv4: hasIPv4, + HasIPv6: hasIPv6, + }) + } + hasEVPNConfig := network.EVPNMACVRFVNI() > 0 || network.EVPNIPVRFVNI() > 0 + if hasEVPNConfig && ra.Spec.TargetVRF != "auto" && ra.Spec.TargetVRF != vrf { + return nil, nil, fmt.Errorf("%w: EVPN network %q with VRF %q requires TargetVRF to be 'auto' or %q, got %q", + errConfig, networkName, vrf, vrf, ra.Spec.TargetVRF) + } // TODO check overlaps? for _, cidr := range network.Subnets() { subnet := cidr.CIDR.String() @@ -399,6 +466,8 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) // ordered slices.Sort(selectedNetworks.vrfs) slices.Sort(selectedNetworks.subnets) + slices.SortFunc(selectedNetworks.macVRFConfigs, func(a, b *vrfConfig) int { return int(a.VNI - b.VNI) }) + slices.SortFunc(selectedNetworks.ipVRFConfigs, func(a, b *ipVRFConfig) int { return int(a.VNI - b.VNI) }) selectedNetworks.networks = sets.List(networkSet) // gather selected nodes @@ -435,6 +504,8 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) if len(frrConfigs) == 0 { return nil, nil, fmt.Errorf("%w: no FRRConfigurations selected", errPending) } + + frrRouterVRFs := sets.New[string]() for _, frrConfig := range frrConfigs { if strings.HasPrefix(frrConfig.Name, generateName) { klog.V(4).Infof("Skipping FRRConfiguration %q selected by RouteAdvertisements %q as it was generated by ovn-kubernetes", frrConfig.Name, ra.Name) @@ -455,6 +526,27 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) } nodeToFRRConfig[node.Name] = append(nodeToFRRConfig[node.Name], frrConfig) } + for _, router := range frrConfig.Spec.BGP.Routers { + frrRouterVRFs.Insert(router.VRF) + } + } + + // Validate EVPN configuration requirements + hasEVPNConfig := len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 + if hasEVPNConfig && !util.IsEVPNEnabled() { + return nil, nil, fmt.Errorf("%w: EVPN networks selected but EVPN feature is not enabled", errConfig) + } + // Require a router with default VRF for any EVPN configuration, since the + // global EVPN section with advertise-all-vni is required for EVPN to work properly. + if hasEVPNConfig && !frrRouterVRFs.Has("") { + return nil, nil, fmt.Errorf("%w: EVPN requires a router with default VRF but none were found in selected FRRConfigurations", errConfig) + } + // Validate IP-VRF networks: each needs either an existing VRF router or + // the default VRF router to create one from. + for _, cfg := range selectedNetworks.ipVRFConfigs { + if !frrRouterVRFs.Has(cfg.VRFName) && !frrRouterVRFs.Has("") { + return nil, nil, fmt.Errorf("%w: IP-VRF EVPN network %q requires a router with VRF %q or a router with default VRF, but none were found in selected FRRConfigurations", errConfig, cfg.NetworkName, cfg.VRFName) + } } // helper to gather host subnets and cache during reconcile @@ -562,6 +654,7 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) nodeName, selectedNetworks, matchedNetworks, + frrRouterVRFs, ) if err != nil { return nil, nil, err @@ -591,8 +684,9 @@ func (c *Controller) generateFRRConfiguration( nodeName string, selectedNetworks *selectedNetworks, matchedNetworks sets.Set[string], + frrRouterVRFs sets.Set[string], ) (*frrtypes.FRRConfiguration, error) { - routers := []frrtypes.Router{} + var routers []frrtypes.Router // go over the source routers for i, router := range source.Spec.BGP.Routers { @@ -670,6 +764,32 @@ func (c *Controller) generateFRRConfiguration( Prefixes: advertisePrefixes, }, } + + // For no-overlay networks, add routes to pod subnets to the accepted routes list + // frr-k8s will merge the prefixes from both the generated and the base FRRConfiguration + if selectedNetworks.networkTransport[matchedNetwork] == types.NetworkTransportNoOverlay { + // Get the pod subnets for this network (the network subnets, not host subnets) + podSubnets := selectedNetworks.networkSubnets[matchedNetwork] + if len(podSubnets) > 0 { + // Filter pod subnets by IP family to match the neighbor + filteredPodSubnets := util.MatchAllIPNetsStringFamily(isIPV6, podSubnets) + if len(filteredPodSubnets) > 0 { + neighbor.ToReceive = frrtypes.Receive{ + Allowed: frrtypes.AllowedInPrefixes{ + Mode: frrtypes.AllowRestricted, + }, + } + for _, subnet := range filteredPodSubnets { + neighbor.ToReceive.Allowed.Prefixes = append(neighbor.ToReceive.Allowed.Prefixes, frrtypes.PrefixSelector{ + Prefix: subnet, + LE: selectedNetworks.prefixLength[subnet], + GE: selectedNetworks.prefixLength[subnet], + }) + } + } + } + } + targetRouter.Neighbors = append(targetRouter.Neighbors, neighbor) } if len(targetRouter.Neighbors) == 0 { @@ -720,11 +840,65 @@ func (c *Controller) generateFRRConfiguration( routers = append(routers, importRouter) } } - if len(routers) == 0 { - // we ended up with no routers, bail out - return nil, nil + var globalRouterASN uint32 + var neighbors []string + vrfASNs := map[string]uint32{} + + if len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 { + // Look for global router in the source FRRConfiguration, not in the filtered routers + for _, router := range source.Spec.BGP.Routers { + if router.VRF == "" { // default VRF + globalRouterASN = router.ASN + for _, neighbor := range router.Neighbors { + neighbors = append(neighbors, neighbor.Address) + } + break + } + } + } + + // For IP-VRF: Find or create routers for each EVPN network's VRF. + // IP-VRF routers don't need neighbors for EVPN (they use the global router's neighbors). + for _, cfg := range selectedNetworks.ipVRFConfigs { + if frrRouterVRFs.Has(cfg.VRFName) { + // VRF router exists somewhere - check if it's in the current source + for _, router := range source.Spec.BGP.Routers { + if router.VRF == cfg.VRFName { + vrfASNs[cfg.VRFName] = router.ASN + if !slices.ContainsFunc(routers, func(r frrtypes.Router) bool { return r.VRF == cfg.VRFName }) { + routers = append(routers, frrtypes.Router{ + ASN: router.ASN, + VRF: cfg.VRFName, + Prefixes: selectedNetworks.hostNetworkSubnets[cfg.NetworkName], + }) + } + break + } + } + // If not in current source, another source will handle it + } else if globalRouterASN > 0 { + // VRF router doesn't exist anywhere - create with global ASN + klog.Infof("Creating router for EVPN network %q VRF %q with ASN=%d, prefixes=%v", + cfg.NetworkName, cfg.VRFName, globalRouterASN, selectedNetworks.hostNetworkSubnets[cfg.NetworkName]) + matchedNetworks.Insert(cfg.NetworkName) + vrfASNs[cfg.VRFName] = globalRouterASN + routers = append(routers, frrtypes.Router{ + ASN: globalRouterASN, + VRF: cfg.VRFName, + Prefixes: selectedNetworks.hostNetworkSubnets[cfg.NetworkName], + }) + } } + // Check if we have anything to generate: routers or EVPN raw config. + // EVPN raw config is generated when we have: + // - A global router (globalRouterASN > 0 && len(neighbors) > 0) for the global EVPN section + // - IP-VRF configs for VRF VNI and VRF EVPN sections + hasEVPNRawConfig := (globalRouterASN > 0 && len(neighbors) > 0) || len(selectedNetworks.ipVRFConfigs) > 0 + if len(routers) == 0 && !hasEVPNRawConfig { + // we ended up with no routers and no EVPN raw config to generate, bail out + return nil, nil + } new := &frrtypes.FRRConfiguration{} new.GenerateName = generateName new.Namespace = source.Namespace @@ -748,6 +922,18 @@ func (c *Controller) generateFRRConfiguration( }, } + // Generate EVPN raw config for the EVPN-specific parts. + // TODO: once frr-k8s provides a typed EVPN API, we can use that instead of raw config + if len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 { + rawConfig := generateEVPNRawConfig(selectedNetworks, globalRouterASN, neighbors, vrfASNs) + if rawConfig != "" { + new.Spec.Raw = frrtypes.RawConfig{ + Priority: evpnRawConfigPriority, + Config: rawConfig, + } + } + } + return new, nil } diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index a6b8e8b664..1bad4f1ad5 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -2,6 +2,7 @@ package routeadvertisements import ( "context" + "encoding/json" "fmt" "strings" "sync" @@ -148,11 +149,18 @@ func (tn testNode) Node() *corev1.Node { } } +type testPrefixSelector struct { + Prefix string + LE uint32 + GE uint32 +} + type testNeighbor struct { ASN uint32 Address string DisableMP *bool Advertise []string + Receive []testPrefixSelector } func (tn testNeighbor) Neighbor() frrapi.Neighbor { @@ -170,6 +178,22 @@ func (tn testNeighbor) Neighbor() frrapi.Neighbor { if tn.DisableMP != nil { n.DisableMP = *tn.DisableMP } + if len(tn.Receive) > 0 { + prefixSelectors := make([]frrapi.PrefixSelector, 0, len(tn.Receive)) + for _, ps := range tn.Receive { + prefixSelectors = append(prefixSelectors, frrapi.PrefixSelector{ + Prefix: ps.Prefix, + LE: ps.LE, + GE: ps.GE, + }) + } + n.ToReceive = frrapi.Receive{ + Allowed: frrapi.AllowedInPrefixes{ + Mode: frrapi.AllowRestricted, + Prefixes: prefixSelectors, + }, + } + } return n } @@ -198,14 +222,16 @@ func (tr testRouter) Router() frrapi.Router { } type testFRRConfig struct { - Name string - Namespace string - Generation int - Labels map[string]string - Annotations map[string]string - Routers []*testRouter - NodeSelector map[string]string - OwnUpdate bool + Name string + Namespace string + Generation int + Labels map[string]string + Annotations map[string]string + Routers []*testRouter + NodeSelector map[string]string + OwnUpdate bool + RawConfig string + RawConfigPriority int } func (tf testFRRConfig) FRRConfiguration() *frrapi.FRRConfiguration { @@ -226,6 +252,10 @@ func (tf testFRRConfig) FRRConfiguration() *frrapi.FRRConfiguration { for _, r := range tf.Routers { f.Spec.BGP.Routers = append(f.Spec.BGP.Routers, r.Router()) } + if tf.RawConfig != "" { + f.Spec.Raw.Config = tf.RawConfig + f.Spec.Raw.Priority = tf.RawConfigPriority + } if tf.OwnUpdate { f.ManagedFields = append(f.ManagedFields, metav1.ManagedFieldsEntry{ Manager: fieldManager, @@ -264,15 +294,19 @@ func (te testEIP) EgressIP() *eiptypes.EgressIP { } type testNAD struct { - Name string - Namespace string - Network string - Subnet string - Labels map[string]string - Annotations map[string]string - IsSecondary bool - Topology string - OwnUpdate bool + Name string + Namespace string + Network string + Subnet string + Labels map[string]string + Annotations map[string]string + IsSecondary bool + Topology string + OwnUpdate bool + EVPNMACVRFVNI int32 + EVPNMACVRFRouteTarget string + EVPNIPVRFVNI int32 + EVPNIPVRFRouteTarget string } func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { @@ -295,27 +329,52 @@ func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { ) nad.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ownerRef} } - topology := tn.Topology - switch { - case tn.IsSecondary: - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\", \"topology\": \"%s\", \"netAttachDefName\": \"%s\", \"subnets\": \"%s\"}", - tn.Network, - config.CNI.Plugin, - topology, - tn.Namespace+"/"+tn.Name, - tn.Subnet, - ) - case tn.Topology != "": - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\", \"topology\": \"%s\", \"netAttachDefName\": \"%s\", \"role\": \"primary\", \"subnets\": \"%s\"}", - tn.Network, - config.CNI.Plugin, - topology, - tn.Namespace+"/"+tn.Name, - tn.Subnet, - ) - default: - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\"}", tn.Network, config.CNI.Plugin) + + // Build the config as a map to properly marshal EVPN config + cniConfig := map[string]interface{}{ + "cniVersion": "0.4.0", + "name": tn.Network, + "type": config.CNI.Plugin, + "netAttachDefName": tn.Namespace + "/" + tn.Name, + } + + if tn.Topology != "" { + cniConfig["topology"] = tn.Topology + } + if tn.Subnet != "" { + cniConfig["subnets"] = tn.Subnet + } + if tn.Topology != "" && !tn.IsSecondary { + cniConfig["role"] = "primary" + } + + // Add EVPN configuration if present + if tn.EVPNMACVRFVNI > 0 || tn.EVPNIPVRFVNI > 0 { + evpnConfig := map[string]interface{}{} + if tn.EVPNMACVRFVNI > 0 { + macvrf := map[string]interface{}{ + "vni": tn.EVPNMACVRFVNI, + } + if tn.EVPNMACVRFRouteTarget != "" { + macvrf["routeTarget"] = tn.EVPNMACVRFRouteTarget + } + evpnConfig["macVRF"] = macvrf + } + if tn.EVPNIPVRFVNI > 0 { + ipvrf := map[string]interface{}{ + "vni": tn.EVPNIPVRFVNI, + } + if tn.EVPNIPVRFRouteTarget != "" { + ipvrf["routeTarget"] = tn.EVPNIPVRFRouteTarget + } + evpnConfig["ipVRF"] = ipvrf + } + cniConfig["evpn"] = evpnConfig } + + configBytes, _ := json.Marshal(cniConfig) + nad.Spec.Config = string(configBytes) + if tn.OwnUpdate { nad.ManagedFields = append(nad.ManagedFields, metav1.ManagedFieldsEntry{ Manager: fieldManager, @@ -372,6 +431,7 @@ func TestController_reconcile(t *testing.T) { namespaces []*testNamespace eips []*testEIP reconcile string + transport string wantErr bool expectAcceptedStatus metav1.ConditionStatus expectFRRConfigs []*testFRRConfig @@ -781,6 +841,37 @@ func TestController_reconcile(t *testing.T) { reconcile: "ra", expectAcceptedStatus: metav1.ConditionTrue, }, + { + name: "reconciles pod RouteAdvertisement for default network in no-overlay mode with ToReceive routes", + ra: &testRA{Name: "ra", AdvertisePods: true, SelectsDefault: true}, + transport: types.NetworkTransportNoOverlay, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.1.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100", Receive: []testPrefixSelector{{Prefix: "1.2.0.0/16"}}}, + }}, + }, + }, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.0.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.0.0/24"}, Receive: []testPrefixSelector{{Prefix: "1.1.0.0/16", LE: 24, GE: 24}}}, + }}, + }}, + }, + expectNADAnnotations: map[string]map[string]string{"default": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, { name: "fails to reconcile a secondary network", ra: &testRA{Name: "ra", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, @@ -942,6 +1033,338 @@ func TestController_reconcile(t *testing.T) { reconcile: "ra", expectAcceptedStatus: metav1.ConditionFalse, }, + { + name: "fails to reconcile EVPN-enabled network to default VRF", + ra: &testRA{Name: "ra", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.1.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "evpn-net", Namespace: "test", Network: util.GenerateCUDNNetworkName("evpn-net"), + Topology: "layer2", Subnet: "1.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "reconciles EVPN MAC-VRF l2 network with a specific target VRF without a VRF router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000, EVPNMACVRFRouteTarget: "65000:1000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + }, + expectNADAnnotations: map[string]map[string]string{"red": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "reconciles EVPN IP-VRF network with auto target and creates a router", + ra: &testRA{Name: "ra", TargetVRF: "auto", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "blue", Namespace: "blue", Network: util.GenerateCUDNNetworkName("blue"), + Topology: "layer3", Subnet: "10.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 2000, EVPNIPVRFRouteTarget: "65000:2000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_blue\":\"10.2.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + Routers: []*testRouter{ + {ASN: 65000, VRF: "blue", Prefixes: []string{"10.2.1.0/24"}}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"blue": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "reconciles EVPN IP-VRF with router ASN from another FRRConfiguration", + ra: &testRA{Name: "ra", TargetVRF: "auto", AdvertisePods: true, SelectsDefault: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfigGlobal", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + { + Name: "frrConfigVRF", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65100, VRF: "blue"}, + }, + }, + }, + nads: []*testNAD{ + {Name: "blue", Namespace: "blue", Network: util.GenerateCUDNNetworkName("blue"), + Topology: "layer3", Subnet: "10.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 2000, EVPNIPVRFRouteTarget: "65000:2000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\",\"cluster_udn_blue\":\"10.2.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigGlobal/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +`, + Routers: []*testRouter{ + {ASN: 65000, Prefixes: []string{"1.1.0.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1", Advertise: []string{"1.1.0.0/24"}}, + }}, + }, + }, + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigVRF/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `vrf blue + vni 2000 +exit-vrf +! +router bgp 65100 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + Routers: []*testRouter{ + {ASN: 65100, VRF: "blue", Prefixes: []string{"10.2.1.0/24"}}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"blue": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "fails to reconcile MACVRF EVPN without global router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "fails to reconcile IPVRF EVPN without global router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "fails to reconcile EVPN with global router but no neighbors", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "reconciles EVPN when global router is in a different FRRConfiguration than VRF router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfigGlobal", + Namespace: frrNamespace, + Routers: []*testRouter{ + // Global router with neighbors - provides ASN and neighbors for EVPN + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + { + Name: "frrConfigVRF", + Namespace: frrNamespace, + Routers: []*testRouter{ + // VRF-specific router - matches the target VRF + {ASN: 65000, VRF: "red", Prefixes: []string{"10.1.0.0/16"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000, EVPNMACVRFRouteTarget: "65000:1000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigGlobal/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigVRF/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Prefixes: []string{"10.1.0.0/16"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1", Advertise: []string{"10.1.0.0/16"}}, + }}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"red": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -960,9 +1383,11 @@ func TestController_reconcile(t *testing.T) { HostSubnetLength: 64, }, } + config.Default.Transport = tt.transport config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableRouteAdvertisements = true config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EnableEVPN = true fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() addGenerateNameReactor[*frrfake.Clientset](fakeClientset.FRRClient) @@ -1315,6 +1740,7 @@ func TestUpdates(t *testing.T) { config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableRouteAdvertisements = true config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EnableEVPN = true fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() diff --git a/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go new file mode 100644 index 0000000000..7e3fc9bd47 --- /dev/null +++ b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go @@ -0,0 +1,140 @@ +package routeadvertisements + +import ( + "fmt" + "strings" +) + +// generateEVPNRawConfig generates raw FRR configuration for EVPN. +// If asn/neighbors aren't provided the related sections are skipped. +// +// Generated config structure: +// +// router bgp <- genGlobalEVPNSection +// address-family l2vpn evpn +// neighbor activate +// advertise-all-vni +// vni <- (one per MAC-VRF with RT, section only added when MAC-VRF RT is set) +// route-target import +// route-target export +// exit-vni +// exit-address-family +// exit +// ! +// vrf <- genVRFVNISection (one per IP-VRF) +// vni +// exit-vrf +// ! +// router bgp vrf <- genVRFEVPNSection (one per IP-VRF) +// address-family l2vpn evpn +// advertise ipv4 unicast +// advertise ipv6 unicast +// route-target import +// route-target export +// exit-address-family +// exit +// ! +func generateEVPNRawConfig(selected *selectedNetworks, asn uint32, neighbors []string, vrfASNs map[string]uint32) string { + var buf strings.Builder + + if asn > 0 && len(neighbors) > 0 { + buf.WriteString(genGlobalEVPNSection(asn, neighbors, selected.macVRFConfigs)) + } + for _, cfg := range selected.ipVRFConfigs { + buf.WriteString(genVRFVNISection(cfg)) + } + // Generate VRF-specific EVPN sections using each config's ASN + for _, cfg := range selected.ipVRFConfigs { + if vrfASN := vrfASNs[cfg.VRFName]; vrfASN > 0 { + buf.WriteString(genVRFEVPNSection(vrfASN, cfg)) + } + } + return buf.String() +} + +// genVRFVNISection generates VRF-to-VNI mapping. +// +// vrf +// vni +// exit-vrf +// ! +func genVRFVNISection(cfg *ipVRFConfig) string { + return fmt.Sprintf(`vrf %s + vni %d +exit-vrf +! +`, cfg.VRFName, cfg.VNI) +} + +// genGlobalEVPNSection generates the global router's EVPN address-family. +// +// router bgp +// address-family l2vpn evpn +// neighbor activate +// advertise-all-vni +// vni <- (Section only added when MAC-VRF RT is set) +// route-target import +// route-target export +// exit-vni +// exit-address-family +// exit +// ! +func genGlobalEVPNSection(asn uint32, neighbors []string, macVRFs []*vrfConfig) string { + var buf strings.Builder + + fmt.Fprintf(&buf, "router bgp %d\n", asn) + buf.WriteString(" address-family l2vpn evpn\n") + + for _, neighbor := range neighbors { + fmt.Fprintf(&buf, " neighbor %s activate\n", neighbor) + } + buf.WriteString(" advertise-all-vni\n") + + for _, cfg := range macVRFs { + if cfg.RouteTarget == "" { + continue + } + fmt.Fprintf(&buf, " vni %d\n", cfg.VNI) + fmt.Fprintf(&buf, " route-target import %s\n", cfg.RouteTarget) + fmt.Fprintf(&buf, " route-target export %s\n", cfg.RouteTarget) + buf.WriteString(" exit-vni\n") + } + + buf.WriteString(" exit-address-family\n") + buf.WriteString("exit\n!\n") + + return buf.String() +} + +// genVRFEVPNSection generates a VRF router's EVPN address-family. +// +// router bgp 65000 vrf red +// address-family l2vpn evpn +// advertise ipv4 unicast +// advertise ipv6 unicast +// route-target import 65000:100 +// route-target export 65000:100 +// exit-address-family +// exit +// ! +func genVRFEVPNSection(asn uint32, cfg *ipVRFConfig) string { + var buf strings.Builder + fmt.Fprintf(&buf, "router bgp %d vrf %s\n", asn, cfg.VRFName) + buf.WriteString(" address-family l2vpn evpn\n") + + if cfg.HasIPv4 { + buf.WriteString(" advertise ipv4 unicast\n") + } + if cfg.HasIPv6 { + buf.WriteString(" advertise ipv6 unicast\n") + } + if cfg.RouteTarget != "" { + fmt.Fprintf(&buf, " route-target import %s\n", cfg.RouteTarget) + fmt.Fprintf(&buf, " route-target export %s\n", cfg.RouteTarget) + } + + buf.WriteString(" exit-address-family\n") + buf.WriteString("exit\n!\n") + + return buf.String() +} diff --git a/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go new file mode 100644 index 0000000000..1b1460441c --- /dev/null +++ b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go @@ -0,0 +1,183 @@ +package routeadvertisements + +import ( + "testing" +) + +func TestGenerateEVPNRawConfig(t *testing.T) { + tests := []struct { + name string + selected *selectedNetworks + asn uint32 + neighbors []string + want string + }{ + { + name: "MAC-VRF without route target", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000}, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +`, + }, + { + name: "MAC-VRF with route target", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000, RouteTarget: "65000:1000"}, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + { + name: "IP-VRF IPv6", + selected: &selectedNetworks{ + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv6: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv6 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + { + name: "IP-VRF dual stack", + selected: &selectedNetworks{ + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv4: true, + HasIPv6: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + advertise ipv6 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + { + name: "MAC-VRF and IP-VRF combined", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000, RouteTarget: "65000:1000"}, + }, + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv4: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1", "192.168.1.2"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + neighbor 192.168.1.2 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vrfASNs := map[string]uint32{} + for _, cfg := range tt.selected.ipVRFConfigs { + if cfg.VRFName != "" { + vrfASNs[cfg.VRFName] = tt.asn + } + } + got := generateEVPNRawConfig(tt.selected, tt.asn, tt.neighbors, vrfASNs) + if got != tt.want { + t.Errorf("generateEVPNRawConfig() mismatch\nGot:\n%s\nWant:\n%s", got, tt.want) + } + }) + } +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index 68409eba9b..3bd6ca3086 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -18,6 +18,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" corev1informer "k8s.io/client-go/informers/core/v1" @@ -29,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/notifier" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" @@ -38,14 +40,42 @@ import ( userdefinednetworkscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/scheme" userdefinednetworkinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions/userdefinednetwork/v1" userdefinednetworklister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/listers/userdefinednetwork/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" + vteplister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/listers/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) -const conditionTypeNetworkCreated = "NetworkCreated" +const ( + conditionTypeNetworkCreated = "NetworkCreated" + + // Condition reasons + reasonNADCreated = "NetworkAttachmentDefinitionCreated" + reasonSyncError = "SyncError" + reasonVTEPNotFound = "VTEPNotFound" + reasonNADDeleted = "NetworkAttachmentDefinitionDeleted" + reasonNADSyncError = "NetworkAttachmentDefinitionSyncError" + + // MaxEVPNVIDs is the maximum number of VIDs available for EVPN networks (0-4094, but 0 and 1 are reserved). + MaxEVPNVIDs = 4095 + // reservedVIDZeroKey is the key used to reserve VID 0 (reserved per IEEE 802.1Q for priority tagging). + reservedVIDZeroKey = "__vid_zero_reserved__" + // reservedVIDOneKey is the key used to reserve VID 1 (default VLAN on many switches, avoided by convention). + reservedVIDOneKey = "__vid_one_reserved__" +) + +// macVRFKey returns the VID allocator key for a network's MAC-VRF. +func macVRFKey(networkName string) string { + return networkName + "/macvrf" +} + +// ipVRFKey returns the VID allocator key for a network's IP-VRF. +func ipVRFKey(networkName string) string { + return networkName + "/ipvrf" +} -type RenderNetAttachDefManifest func(obj client.Object, targetNamespace string) (*netv1.NetworkAttachmentDefinition, error) +type RenderNetAttachDefManifest func(obj client.Object, targetNamespace string, opts ...template.RenderOption) (*netv1.NetworkAttachmentDefinition, error) type networkInUseError struct { err error @@ -55,6 +85,15 @@ func (n *networkInUseError) Error() string { return n.err.Error() } +// vtepNotFoundError indicates that a required VTEP CR does not exist. +type vtepNotFoundError struct { + vtepName string +} + +func (e *vtepNotFoundError) Error() string { + return fmt.Sprintf("VTEP %q does not exist", e.vtepName) +} + type Controller struct { // cudnController manage ClusterUserDefinedNetwork CRs. cudnController controller.Controller @@ -76,6 +115,10 @@ type Controller struct { networkManager networkmanager.Interface + // vidAllocator allocates cluster-wide VLAN IDs for EVPN networks. + // VIDs are allocated per network name and stored in the NAD config JSON. + vidAllocator id.Allocator + udnClient userdefinednetworkclientset.Interface udnLister userdefinednetworklister.UserDefinedNetworkLister cudnLister userdefinednetworklister.ClusterUserDefinedNetworkLister @@ -83,6 +126,10 @@ type Controller struct { nadLister netv1lister.NetworkAttachmentDefinitionLister podInformer corev1informer.PodInformer namespaceInformer corev1informer.NamespaceInformer + // vtepLister provides read access to VTEP CRs for validating EVPN configuration. + vtepLister vteplister.VTEPLister + // vtepNotifier notifies subscribing controllers about VTEP events. + vtepNotifier *notifier.VTEPNotifier networkInUseRequeueInterval time.Duration eventRecorder record.EventRecorder @@ -98,10 +145,15 @@ func New( networkManager networkmanager.Interface, podInformer corev1informer.PodInformer, namespaceInformer corev1informer.NamespaceInformer, + vtepInformer vtepinformer.VTEPInformer, eventRecorder record.EventRecorder, ) *Controller { udnLister := udnInformer.Lister() cudnLister := cudnInformer.Lister() + + // Allocates VIDs in range 1-4094 (0 is reserved per IEEE 802.1Q). + vidAllocator := id.NewIDAllocator("EVPN-VIDs", MaxEVPNVIDs) + c := &Controller{ nadClient: nadClient, nadLister: nadInfomer.Lister(), @@ -113,6 +165,7 @@ func New( namespaceInformer: namespaceInformer, networkManager: networkManager, namespaceTracker: map[string]sets.Set[string]{}, + vidAllocator: vidAllocator, eventRecorder: eventRecorder, } udnCfg := &controller.ControllerConfig[userdefinednetworkv1.UserDefinedNetwork]{ @@ -138,18 +191,30 @@ func New( c.nadNotifier = notifier.NewNetAttachDefNotifier(nadInfomer, c) c.namespaceNotifier = notifier.NewNamespaceNotifier(namespaceInformer, c) + // Setup EVPN components only when EVPN is enabled. + if util.IsEVPNEnabled() && vtepInformer != nil { + // Setup VTEP watching for EVPN support. + c.vtepLister = vtepInformer.Lister() + c.vtepNotifier = notifier.NewVTEPNotifier(vtepInformer, c) + } + return c } func (c *Controller) Run() error { klog.Infof("Starting user-defined network controllers") - if err := controller.StartWithInitialSync( - c.initializeNamespaceTracker, + + controllers := []controller.Reconciler{ c.cudnController, c.udnController, c.nadNotifier.Controller, c.namespaceNotifier.Controller, - ); err != nil { + } + if c.vtepNotifier != nil { + controllers = append(controllers, c.vtepNotifier.Controller) + } + + if err := controller.StartWithInitialSync(c.initializeController, controllers...); err != nil { return fmt.Errorf("unable to start user-defined network controller: %v", err) } @@ -162,57 +227,233 @@ func (c *Controller) Run() error { return nil } -// initializeNamespaceTracker populates the namespace-tracker with NAD namespaces who owned by the controller. -func (c *Controller) initializeNamespaceTracker() error { - cudns, err := c.cudnLister.List(labels.Everything()) +// initializeController performs all startup initialization before controllers begin processing. +func (c *Controller) initializeController() error { + // Reserve VID 0 and VID 1 to ensure they're never allocated to any network. + // VID 0 is reserved per IEEE 802.1Q standard. + // VID 1 is the default VLAN on many switches and avoided by convention. + if err := c.vidAllocator.ReserveID(reservedVIDZeroKey, 0); err != nil { + return fmt.Errorf("failed to reserve VID 0: %w", err) + } + if err := c.vidAllocator.ReserveID(reservedVIDOneKey, 1); err != nil { + return fmt.Errorf("failed to reserve VID 1: %w", err) + } + + cudnNADs, err := c.buildCUDNToNADs() if err != nil { return err } - if len(cudns) == 0 { + if len(cudnNADs) == 0 { return nil } + c.initializeNamespaceTracker(cudnNADs) + if util.IsEVPNEnabled() { + // Recover VID allocations from existing EVPN CUDNs. + // Recovery failures are logged and the affected CUDNs are enqueued for reconciliation, + // but don't block startup - this prevents a DoS where a malicious NAD could + // crash the entire cluster-manager. + c.recoverEVPNVIDs(cudnNADs) + } + + return nil +} + +// cudnWithNADs pairs a CUDN with its owned NADs. +type cudnWithNADs struct { + cudn *userdefinednetworkv1.ClusterUserDefinedNetwork + nads []netv1.NetworkAttachmentDefinition +} + +// cudnToNADs maps CUDN name to its object and owned NADs. +type cudnToNADs map[string]*cudnWithNADs + +// buildCUDNToNADs builds an index of CUDNs to their owned NADs. +// It returns an entry for every existing CUDN, including CUDNs that currently own no NADs +func (c *Controller) buildCUDNToNADs() (cudnToNADs, error) { + cudns, err := c.cudnLister.List(labels.Everything()) + if err != nil { + return nil, err + } + if len(cudns) == 0 { + return nil, nil + } + nads, err := c.nadLister.List(labels.Everything()) if err != nil { - return err + return nil, err } - if len(nads) == 0 { - return nil + + cudnByUID := make(map[types.UID]*userdefinednetworkv1.ClusterUserDefinedNetwork, len(cudns)) + index := make(cudnToNADs, len(cudns)) + for _, cudn := range cudns { + cudnByUID[cudn.UID] = cudn + index[cudn.Name] = &cudnWithNADs{cudn: cudn} } - indexedNADs := map[string]netv1.NetworkAttachmentDefinition{} + for _, nad := range nads { - if nad != nil { - indexedNADs[nad.Namespace+"/"+nad.Name] = *nad.DeepCopy() + if nad == nil { + continue + } + controllerRef := metav1.GetControllerOfNoCopy(nad) + if controllerRef == nil { + continue + } + if cudn, ok := cudnByUID[controllerRef.UID]; ok { + index[cudn.Name].nads = append(index[cudn.Name].nads, *nad.DeepCopy()) } } - for _, cudn := range cudns { - c.namespaceTracker[cudn.Name] = sets.New[string]() + return index, nil +} - for nadKey, nad := range indexedNADs { - if !metav1.IsControlledBy(&nad, cudn) { - continue - } - c.namespaceTracker[cudn.Name].Insert(nad.Namespace) +// initializeNamespaceTracker populates the namespace tracker with NAD namespaces owned by each CUDN. +func (c *Controller) initializeNamespaceTracker(cudnNADs cudnToNADs) { + for cudnName, entry := range cudnNADs { + c.namespaceTracker[cudnName] = sets.New[string]() + for _, nad := range entry.nads { + c.namespaceTracker[cudnName].Insert(nad.Namespace) + } + } +} + +// recoverEVPNVIDs recovers VID allocations from existing EVPN CUDNs using +// NetworkManager's cached NetInfo. NetworkManager has already processed all NADs +// by the time this function is called (it starts before UDN controller). +// +// CUDNs are processed in order of creation timestamp (oldest first) to ensure +// deterministic VID assignment when conflicts occur. If two CUDNs have NADs +// claiming the same VID, the oldest CUDN wins ("first come, first served"). +// CUDN name is used as tie-breaker when timestamps are equal. +// +// If VID recovery fails for a CUDN (e.g., NetworkManager couldn't parse the NAD), +// this logs an error and enqueues the CUDN for reconciliation. +func (c *Controller) recoverEVPNVIDs(cudnNADs cudnToNADs) { + // Extract EVPN CUDNs with NADs into a slice for deterministic ordering. + evpnCUDNs := make([]*cudnWithNADs, 0, len(cudnNADs)) + for _, entry := range cudnNADs { + if entry.cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + continue + } + if len(entry.nads) == 0 { + klog.V(4).Infof("EVPN CUDN %s has no NADs, skipping VID recovery", entry.cudn.Name) + continue + } + evpnCUDNs = append(evpnCUDNs, entry) + } + + // Sort by creation timestamp (oldest first) for deterministic conflict resolution. + // When two CUDNs have conflicting VIDs, the oldest one wins. + // Use name as tie-breaker when timestamps are equal for consistent ordering. + slices.SortFunc(evpnCUDNs, func(a, b *cudnWithNADs) int { + if a.cudn.CreationTimestamp.Before(&b.cudn.CreationTimestamp) { + return -1 + } + if b.cudn.CreationTimestamp.Before(&a.cudn.CreationTimestamp) { + return 1 + } + return strings.Compare(a.cudn.Name, b.cudn.Name) + }) - // Usually we don't want to mutate an iterated map, in this case - // the processed entry is removed because it shouldn't be processed - // again and not expected to be visited again, i.e.: the NAD should - // be recorded by the namespaceTracker once. - delete(indexedNADs, nadKey) + for _, entry := range evpnCUDNs { + if err := c.recoverEVPNVIDsForCUDN(entry.cudn.Name); err != nil { + klog.Errorf("VID recovery failed for EVPN CUDN %s: %v. "+ + "The CUDN will be reconciled and existing NAD VIDs will be preserved if possible.", + entry.cudn.Name, err) + c.cudnController.Reconcile(entry.cudn.Name) } } +} + +// recoverEVPNVIDsForCUDN attempts to recover VIDs for a single CUDN using NetworkManager's cache. +// Returns nil if VIDs were successfully recovered or if no VIDs are allocated yet. +// Returns error if VID reservation fails (e.g., conflict with another network). +func (c *Controller) recoverEVPNVIDsForCUDN(cudnName string) error { + networkName := util.GenerateCUDNNetworkName(cudnName) + + // Use NetworkManager's cached NetInfo - it has already parsed the NAD + netInfo := c.networkManager.GetNetwork(networkName) + if netInfo == nil { + // NetworkManager doesn't have this network cached. This can happen if: + // - NetworkManager failed to parse the NAD (corrupted) + // - NAD doesn't exist yet + return fmt.Errorf("network %s not found in NetworkManager cache", networkName) + } + + macVRFVID := netInfo.EVPNMACVRFVID() + ipVRFVID := netInfo.EVPNIPVRFVID() + + // Check if this network has EVPN VIDs allocated + if macVRFVID == 0 && ipVRFVID == 0 { + klog.V(4).Infof("EVPN CUDN %s has no VIDs allocated yet, skipping recovery", cudnName) + return nil // No VIDs to recover + } + if err := c.reserveRecoveredVIDs(cudnName, macVRFVID, ipVRFVID); err != nil { + return fmt.Errorf("failed to reserve VIDs for cudn %s: %w", cudnName, err) + } + + klog.V(4).Infof("Recovered VIDs for CUDN %s (macVRF=%d, ipVRF=%d)", cudnName, macVRFVID, ipVRFVID) return nil } +// reserveRecoveredVIDs reserves the given VIDs in the allocator for a network. +// VIDs of 0 are skipped (not allocated). +// +// Both VIDs are attempted even if one fails - this maximizes recovery and protects +// as many VIDs as possible. We don't release successfully reserved VIDs on partial +// failure because they represent state that already exists in NADs; releasing them +// could allow another network to "steal" the VID, causing route leakage. +func (c *Controller) reserveRecoveredVIDs(networkName string, macVRFVID, ipVRFVID int) error { + var errs []error + + if macVRFVID > 0 { + if err := c.vidAllocator.ReserveID(macVRFKey(networkName), macVRFVID); err != nil { + errs = append(errs, fmt.Errorf("failed to reserve VID %d for MAC-VRF of network %s: %w", macVRFVID, networkName, err)) + } else { + klog.V(4).Infof("Recovered VID %d for MAC-VRF of network %s", macVRFVID, networkName) + } + } + if ipVRFVID > 0 { + if err := c.vidAllocator.ReserveID(ipVRFKey(networkName), ipVRFVID); err != nil { + errs = append(errs, fmt.Errorf("failed to reserve VID %d for IP-VRF of network %s: %w", ipVRFVID, networkName, err)) + } else { + klog.V(4).Infof("Recovered VID %d for IP-VRF of network %s", ipVRFVID, networkName) + } + } + + return errors.Join(errs...) +} + +// releaseVIDForNetwork releases the VIDs allocated for a network's VRFs. +// +// NOTE: VID release is not synchronized with node-side dataplane cleanup. +// In theory, a rapidly created new network could get the same VID while nodes +// are still tearing down the old network's bridge configuration. In practice, +// VID collisions are unlikely because the allocator is monotonic and won't +// reallocate the same VID unless the pool fills up or CUDNs are recycled rapidly. +// The actual mitigation is on the node-side: nodes should check for VID conflicts +// and refuse to configure a VID already in use by a different network, waiting +// until the old network is cleaned up. +func (c *Controller) releaseVIDForNetwork(networkName string) { + macVID := c.vidAllocator.ReleaseID(macVRFKey(networkName)) + ipVID := c.vidAllocator.ReleaseID(ipVRFKey(networkName)) + if macVID >= 0 || ipVID >= 0 { + klog.V(4).Infof("Released VIDs for network %s: MAC-VRF=%d, IP-VRF=%d", networkName, macVID, ipVID) + } +} + func (c *Controller) Shutdown() { - controller.Stop( + controllers := []controller.Reconciler{ c.cudnController, c.udnController, c.nadNotifier.Controller, c.namespaceNotifier.Controller, - ) + } + if c.vtepNotifier != nil { + controllers = append(controllers, c.vtepNotifier.Controller) + } + controller.Stop(controllers...) } // ReconcileNetAttachDef enqueue NAD requests following NAD events. @@ -263,14 +504,14 @@ func (c *Controller) ReconcileNetAttachDef(key string) error { // ReconcileNamespace enqueue relevant Cluster UDN CR requests following namespace events. func (c *Controller) ReconcileNamespace(key string) error { namespace, err := c.namespaceInformer.Lister().Get(key) - if err != nil { - // Ignore removed namespaces - if apierrors.IsNotFound(err) { - return nil - } + if err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("failed to get namespace %q from cache: %w", key, err) } - namespaceLabels := labels.Set(namespace.Labels) + + var namespaceLabels labels.Set + if namespace != nil { + namespaceLabels = namespace.Labels + } c.namespaceTrackerLock.RLock() defer c.namespaceTrackerLock.RUnlock() @@ -278,12 +519,20 @@ func (c *Controller) ReconcileNamespace(key string) error { for cudnName, affectedNamespaces := range c.namespaceTracker { affectedNamespace := affectedNamespaces.Has(key) - selectedNamespace := false + // For deleted namespaces, only reconcile if tracked + if namespace == nil { + if affectedNamespace { + klog.Errorf("BUG: namespace %q was deleted but still tracked by ClusterUDN %q, forcing reconcile to cleanup", key, cudnName) + c.cudnController.Reconcile(cudnName) + } + continue + } + selectedNamespace := false if !affectedNamespace { cudn, err := c.cudnLister.Get(cudnName) if err != nil { - return fmt.Errorf("faild to get CUDN %q from cache: %w", cudnName, err) + return fmt.Errorf("failed to get CUDN %q from cache: %w", cudnName, err) } cudnSelector, err := metav1.LabelSelectorAsSelector(&cudn.Spec.NamespaceSelector) if err != nil { @@ -488,19 +737,19 @@ func newNetworkCreatedCondition(nad *netv1.NetworkAttachmentDefinition, syncErro networkCreatedCondition := &metav1.Condition{ Type: conditionTypeNetworkCreated, Status: metav1.ConditionTrue, - Reason: "NetworkAttachmentDefinitionCreated", + Reason: reasonNADCreated, Message: "NetworkAttachmentDefinition has been created", LastTransitionTime: now, } if nad != nil && !nad.DeletionTimestamp.IsZero() { networkCreatedCondition.Status = metav1.ConditionFalse - networkCreatedCondition.Reason = "NetworkAttachmentDefinitionDeleted" + networkCreatedCondition.Reason = reasonNADDeleted networkCreatedCondition.Message = "NetworkAttachmentDefinition is being deleted" } if syncError != nil { networkCreatedCondition.Status = metav1.ConditionFalse - networkCreatedCondition.Reason = "SyncError" + networkCreatedCondition.Reason = reasonSyncError networkCreatedCondition.Message = syncError.Error() } @@ -511,8 +760,8 @@ func (c *Controller) cudnNeedUpdate(_ *userdefinednetworkv1.ClusterUserDefinedNe return true } -// reconcileUDN get ClusterUserDefinedNetwork CR key and reconcile it according to spec. -// It creates NADs according to spec at the spesified selected namespaces. +// reconcileCUDN get ClusterUserDefinedNetwork CR key and reconcile it according to spec. +// It creates NADs according to spec at the specified selected namespaces. // The NAD objects are created with the same key as the request CR, having both kinds have the same key enable // the controller to act on NAD changes as well and reconciles NAD objects (e.g: in case NAD is deleted it will be re-created). func (c *Controller) reconcileCUDN(key string) error { @@ -539,6 +788,14 @@ func (c *Controller) reconcileCUDN(key string) error { return updateStatusErr } + // vtepNotFoundError is non-fatal: the status has been updated to reflect + // the missing VTEP, and the VTEPNotifier will re-queue this CUDN when + // the VTEP is created. No need to return an error that would cause retries. + var vtepNotFound *vtepNotFoundError + if errors.As(syncErr, &vtepNotFound) { + return updateStatusErr + } + return errors.Join(syncErr, updateStatusErr) } @@ -596,6 +853,7 @@ func (c *Controller) syncClusterUDN(cudn *userdefinednetworkv1.ClusterUserDefine delete(c.namespaceTracker, cudnName) metrics.DecrementCUDNCount(role, topology) metrics.DeleteDynamicUDNNodeCount(util.GenerateCUDNNetworkName(cudn.Name)) + c.releaseVIDForNetwork(cudnName) } return nil, nil @@ -616,6 +874,10 @@ func (c *Controller) syncClusterUDN(cudn *userdefinednetworkv1.ClusterUserDefine metrics.IncrementCUDNCount(role, topology) } + if err := c.validateEVPNVTEP(cudn); err != nil { + return nil, err + } + selectedNamespaces, err := c.getSelectedNamespaces(cudn.Spec.NamespaceSelector) if err != nil { return nil, fmt.Errorf("failed to get selected namespaces: %w", err) @@ -658,6 +920,10 @@ func (c *Controller) getSelectedNamespaces(sel metav1.LabelSelector) (sets.Set[s return nil, fmt.Errorf("failed to list namespaces: %w", err) } for _, selectedNs := range selectedNamespacesList { + if !selectedNs.DeletionTimestamp.IsZero() { + klog.V(5).Infof("Namespace %s is being deleted, skipping", selectedNs.Name) + continue + } selectedNamespaces.Insert(selectedNs.Name) } return selectedNamespaces, nil @@ -673,7 +939,7 @@ func (c *Controller) updateClusterUDNStatus(cudn *userdefinednetworkv1.ClusterUs return strings.Compare(a.Namespace, b.Namespace) }) - networkCreatedCondition := newClusterNetworCreatedCondition(nads, syncError) + networkCreatedCondition := newClusterNetworkCreatedCondition(nads, syncError) updated := meta.SetStatusCondition(&cudn.Status.Conditions, networkCreatedCondition) if !updated { @@ -707,7 +973,7 @@ func (c *Controller) updateClusterUDNStatus(cudn *userdefinednetworkv1.ClusterUs return nil } -func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, syncError error) metav1.Condition { +func newClusterNetworkCreatedCondition(nads []netv1.NetworkAttachmentDefinition, syncError error) metav1.Condition { var namespaces []string for _, nad := range nads { namespaces = append(namespaces, nad.Namespace) @@ -718,7 +984,7 @@ func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, condition := metav1.Condition{ Type: conditionTypeNetworkCreated, Status: metav1.ConditionTrue, - Reason: "NetworkAttachmentDefinitionCreated", + Reason: reasonNADCreated, Message: fmt.Sprintf("NetworkAttachmentDefinition has been created in following namespaces: [%s]", affectedNamespaces), LastTransitionTime: now, } @@ -731,15 +997,80 @@ func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, } if len(deletedNadKeys) > 0 { condition.Status = metav1.ConditionFalse - condition.Reason = "NetworkAttachmentDefinitionDeleted" + condition.Reason = reasonNADDeleted condition.Message = fmt.Sprintf("NetworkAttachmentDefinition are being deleted: %v", deletedNadKeys) } if syncError != nil { condition.Status = metav1.ConditionFalse - condition.Reason = "NetworkAttachmentDefinitionSyncError" - condition.Message = syncError.Error() + + // Check for specific error types to provide better status reasons + var vtepNotFound *vtepNotFoundError + if errors.As(syncError, &vtepNotFound) { + condition.Reason = reasonVTEPNotFound + condition.Message = fmt.Sprintf("Cannot create network: VTEP '%s' does not exist. "+ + "Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + vtepNotFound.vtepName) + } else { + condition.Reason = reasonNADSyncError + condition.Message = syncError.Error() + } } return condition } + +// validateEVPNVTEP validates EVPN configuration for a CUDN. +// Returns an error if EVPN is requested but disabled, or if the referenced VTEP doesn't exist. +func (c *Controller) validateEVPNVTEP(cudn *userdefinednetworkv1.ClusterUserDefinedNetwork) error { + if cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + return nil // Not an EVPN network + } + + if !util.IsEVPNEnabled() { + return fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + + // CEL validation ensures EVPN is set when transport is EVPN. + vtepName := cudn.Spec.Network.EVPN.VTEP + _, err := c.vtepLister.Get(vtepName) + if err != nil { + if apierrors.IsNotFound(err) { + return &vtepNotFoundError{vtepName: vtepName} + } + return fmt.Errorf("failed to get VTEP %q: %w", vtepName, err) + } + + return nil +} + +// ReconcileVTEP handles VTEP events by re-queuing all CUDNs that reference the VTEP. +// +// This uses O(n) iteration over all CUDNs rather than maintaining an index because: +// VTEP create/delete events are expected to be rare; scanning all CUDNs from the +// informer cache keeps the logic simple. If this becomes a hot path at large +// CUDN counts, add an informer indexer keyed by VTEP. +func (c *Controller) ReconcileVTEP(vtepName string) error { + cudns, err := c.cudnLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list CUDNs: %w", err) + } + + for _, cudn := range cudns { + if cudnReferencesVTEP(cudn, vtepName) { + klog.V(4).InfoS("Re-queueing CUDN following VTEP event", "cudn", cudn.Name, "vtep", vtepName) + c.cudnController.Reconcile(cudn.Name) + } + } + + return nil +} + +// cudnReferencesVTEP returns true if the CUDN is an EVPN network referencing the given VTEP. +// CEL validation ensures EVPN is set when transport is EVPN. +func cudnReferencesVTEP(cudn *userdefinednetworkv1.ClusterUserDefinedNetwork, vtepName string) bool { + if cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + return false + } + return cudn.Spec.Network.EVPN.VTEP == vtepName +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go index 735b0afea2..127552ac93 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go @@ -16,6 +16,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" @@ -35,16 +36,22 @@ func (c *Controller) updateNAD(obj client.Object, namespace string) (*netv1.Netw } } - desiredNAD, err := c.renderNadFn(obj, namespace) + existingNAD, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(obj.GetName()) + if err != nil && !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get NetworkAttachmentDefinition %s/%s from cache: %v", namespace, obj.GetName(), err) + } + + renderOpts, err := c.allocateEVPNVIDsIfNeeded(obj) if err != nil { - return nil, fmt.Errorf("failed to generate NetworkAttachmentDefinition: %w", err) + return nil, fmt.Errorf("failed to allocate EVPN VIDs: %w", err) } - nad, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(obj.GetName()) - if err != nil && !apierrors.IsNotFound(err) { - return nil, fmt.Errorf("failed to get NetworkAttachmentDefinition %s/%s from cache: %v", namespace, obj.GetName(), err) + desiredNAD, err := c.renderNadFn(obj, namespace, renderOpts...) + if err != nil { + return nil, fmt.Errorf("failed to generate NetworkAttachmentDefinition: %w", err) } - nadCopy := nad.DeepCopy() + + nadCopy := existingNAD.DeepCopy() if nadCopy == nil { // creating NAD in case no primary network exist should be atomic and synchronized with @@ -119,7 +126,7 @@ func (c *Controller) deleteNAD(obj client.Object, namespace string) error { pods, err := c.podInformer.Lister().Pods(nadCopy.Namespace).List(labels.Everything()) if err != nil { - return fmt.Errorf("failed to list pods at target namesapce %q: %w", nadCopy.Namespace, err) + return fmt.Errorf("failed to list pods at target namespace %q: %w", nadCopy.Namespace, err) } // This is best-effort check no pod using the subject NAD, // noting prevent a from being pod creation right after this check. @@ -142,3 +149,55 @@ func (c *Controller) deleteNAD(obj client.Object, namespace string) error { return nil } + +// allocateEVPNVIDsIfNeeded checks if the object is an EVPN network and allocates VIDs if needed. +// Returns render options containing the allocated VIDs, or empty options for non-EVPN networks. +// Returns an error if EVPN transport is requested but the feature flag is disabled. +// +// This function relies on the idempotency of AllocateID: if a VID was already allocated for a key +// (either during recovery or a previous reconciliation), AllocateID returns the same VID. +// This means VIDs are stable across reconciliations without needing to parse the existing NAD. +func (c *Controller) allocateEVPNVIDsIfNeeded(obj client.Object) ([]template.RenderOption, error) { + spec := template.GetSpec(obj) + if spec.GetTransport() != userdefinednetworkv1.TransportOptionEVPN { + return nil, nil + } + + // EVPN transport is requested - ensure the feature is enabled. + if !util.IsEVPNEnabled() { + return nil, fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + + evpnCfg := spec.GetEVPN() + if evpnCfg == nil { + return nil, nil + } + + networkName := obj.GetName() + var macVRFVID, ipVRFVID int + + // Allocate VID for MAC-VRF if present + if evpnCfg.MACVRF != nil { + vid, err := c.vidAllocator.AllocateID(macVRFKey(networkName)) + if err != nil { + return nil, fmt.Errorf("failed to allocate VID for MAC-VRF: %w", err) + } + macVRFVID = vid + klog.V(4).InfoS("Allocated VID for MAC-VRF", "network", networkName, "vid", vid) + } + + // Allocate VID for IP-VRF if present + if evpnCfg.IPVRF != nil { + vid, err := c.vidAllocator.AllocateID(ipVRFKey(networkName)) + if err != nil { + return nil, fmt.Errorf("failed to allocate VID for IP-VRF: %w", err) + } + ipVRFVID = vid + klog.V(4).InfoS("Allocated VID for IP-VRF", "network", networkName, "vid", vid) + } + + // Return render options with allocated VIDs. + // Note: API validation ensures at least one of macVRF or ipVRF is specified, + // so at least one VID will be allocated if we reach here. + return []template.RenderOption{template.WithEVPNVIDs(macVRFVID, ipVRFVID)}, nil +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 2a07e96dbe..b609859daa 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -2,6 +2,7 @@ package userdefinednetwork import ( "context" + "encoding/json" "errors" "fmt" "strings" @@ -22,10 +23,13 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" udnclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" udnfakeclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -39,6 +43,7 @@ var _ = Describe("User Defined Network Controller", func() { var ( cs *util.OVNClusterManagerClientset f *factory.WatchFactory + nm networkmanager.Controller ) BeforeEach(func() { @@ -46,9 +51,16 @@ var _ = Describe("User Defined Network Controller", func() { Expect(config.PrepareTestConfig()).To(Succeed()) config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableNetworkSegmentation = true + // Enable EVPN for EVPN-related tests + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true }) AfterEach(func() { + if nm != nil { + nm.Stop() + nm = nil + } if f != nil { f.Shutdown() } @@ -65,7 +77,30 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).NotTo(HaveOccurred()) return New(cs.NetworkAttchDefClient, f.NADInformer(), cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(), - renderNADStub, networkManager.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), nil, + renderNADStub, networkManager.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), f.VTEPInformer(), nil, + ) + } + + // newTestControllerWithNetworkManager creates a controller with a started NetworkManager. + newTestControllerWithNetworkManager := func(renderNADStub RenderNetAttachDefManifest, objects ...runtime.Object) *Controller { + cs = util.GetOVNClientset(objects...).GetClusterManagerClientset() + var err error + f, err = factory.NewClusterManagerWatchFactory(cs) + Expect(err).NotTo(HaveOccurred()) + Expect(f.Start()).To(Succeed()) + + nm, err = networkmanager.NewForCluster(&networkmanager.FakeControllerManager{}, f, cs, nil, id.NewTunnelKeyAllocator("TunnelKeys")) + Expect(err).NotTo(HaveOccurred()) + // Start NetworkManager - it will process existing NADs and cache their VIDs + Expect(nm.Start()).To(Succeed()) + + var vtepInformer vtepinformer.VTEPInformer + if util.IsEVPNEnabled() { + vtepInformer = f.VTEPInformer() + } + return New(cs.NetworkAttchDefClient, f.NADInformer(), + cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(), + renderNADStub, nm.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), vtepInformer, nil, ) } @@ -445,6 +480,827 @@ var _ = Describe("User Defined Network Controller", func() { } }) + It("should allocate VID for EVPN network NAD", func() { + testNs := testNamespace("evpn-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-test]", + }})) + + // Verify VID was allocated in the NAD config + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated for EVPN MAC-VRF (first available after 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate VID for EVPN network NAD with IP-VRF only", func() { + testNs := testNamespace("evpn-ipvrf-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNIPVRFClusterUDN("evpn-ipvrf-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-ipvrf-test]", + }})) + + // Verify VID was allocated in the NAD config (IP-VRF only, no MAC-VRF) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(0), "MAC-VRF should not be present for IP-VRF only config") + g.Expect(ipVID).To(Equal(2), "VID should be allocated for EVPN IP-VRF only (first available after 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate separate VIDs for EVPN network with both MAC-VRF and IP-VRF (symmetric IRB)", func() { + testNs := testNamespace("evpn-irb-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-irb-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-irb-test]", + }})) + + // Verify both VIDs were allocated with different values + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF should get VID 2 (first available)") + g.Expect(ipVID).To(Equal(3), "IP-VRF should get VID 3") + }).Should(Succeed()) + }) + + It("should allocate different VIDs for multiple EVPN networks", func() { + testNs := testNamespace("evpn-multi-test") + vtep := testVTEP("vtep-test") + cudn1 := testEVPNClusterUDN("evpn-cudn-1", vtep.Name, testNs.Name) + cudn2 := testEVPNClusterUDN("evpn-cudn-2", vtep.Name, testNs.Name) + cudn2.UID = "2" // Different UID for second CUDN + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn1, cudn2, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for both NADs to be created and have VIDs, and verify they are different + Eventually(func(g Gomega) { + nad1, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), "evpn-cudn-1", metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + nad2, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), "evpn-cudn-2", metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + vid1, _ := evpnVIDsFromNAD(nad1) + vid2, _ := evpnVIDsFromNAD(nad2) + g.Expect(vid1).To(BeNumerically(">", 0), "NAD 1 should have VID allocated") + g.Expect(vid2).To(BeNumerically(">", 0), "NAD 2 should have VID allocated") + // VIDs should be different from each other + // Note: Order is non-deterministic due to concurrent CUDN processing + g.Expect(vid1).NotTo(Equal(vid2), "VIDs should be different for different networks") + }).Should(Succeed()) + }) + + It("should release VID when EVPN CUDN is deleted", func() { + testNs := testNamespace("evpn-delete-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-delete-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for CUDN to be processed and NAD created with VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "First CUDN should get VID 2 (first available)") + }).Should(Succeed()) + + // Verify VID is allocated in the controller's allocator + Expect(c.vidAllocator.GetID("evpn-delete-cudn/macvrf")).To(BeNumerically(">=", 0), "VID should be allocated") + + // Trigger deletion by setting DeletionTimestamp and processing + now := metav1.Now() + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.DeletionTimestamp = &now + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Wait for finalizer to be removed (indicating deletion was processed) + Eventually(func(g Gomega) { + updatedCUDN, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatedCUDN.Finalizers).To(BeEmpty(), "Finalizer should be removed after deletion") + // Verify VID is released from the allocator + g.Expect(c.vidAllocator.GetID("evpn-delete-cudn/macvrf")).To(Equal(-1), "VID should be released after deletion") + }).Should(Succeed()) + }) + + It("should release both MAC-VRF and IP-VRF VIDs when symmetric IRB CUDN is deleted", func() { + testNs := testNamespace("evpn-irb-delete-test") + vtep := testVTEP("vtep-irb-delete") + cudn := testSymmetricIRBClusterUDN("evpn-irb-delete", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for CUDN to be processed and NAD created with both VIDs + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF VID should be allocated (first available)") + g.Expect(ipVID).To(Equal(3), "IP-VRF VID should be allocated") + }).Should(Succeed()) + + // Verify both VIDs are allocated in the controller's allocator + Expect(c.vidAllocator.GetID("evpn-irb-delete/macvrf")).To(Equal(2), "MAC-VRF VID should be allocated (first available)") + Expect(c.vidAllocator.GetID("evpn-irb-delete/ipvrf")).To(Equal(3), "IP-VRF VID should be allocated") + + // Trigger deletion + now := metav1.Now() + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.DeletionTimestamp = &now + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Wait for finalizer to be removed and verify both VIDs are released + Eventually(func(g Gomega) { + updatedCUDN, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatedCUDN.Finalizers).To(BeEmpty(), "Finalizer should be removed after deletion") + // Verify both VIDs are released from the allocator + g.Expect(c.vidAllocator.GetID("evpn-irb-delete/macvrf")).To(Equal(-1), "MAC-VRF VID should be released after deletion") + g.Expect(c.vidAllocator.GetID("evpn-irb-delete/ipvrf")).To(Equal(-1), "IP-VRF VID should be released after deletion") + }).Should(Succeed()) + }) + + It("should preserve allocated VID when EVPN CUDN is updated", func() { + testNs := testNamespace("evpn-update-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-update-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for initial VID allocation + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated (first available)") + }).Should(Succeed()) + + // Update CUDN (trigger reconciliation) + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.Annotations = map[string]string{"updated": "true"} + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Ensure VID remains the same after reconciliation + Consistently(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should remain consistent after CUDN update") + }, 500*time.Millisecond, 50*time.Millisecond).Should(Succeed()) + }) + + It("should continue startup and allocate new VID when all NADs are corrupted", func() { + // VID recovery failures no longer block startup to prevent DoS attacks + // via malicious NADs. Instead, the CUDN is enqueued for reconciliation + // and a new VID is allocated. + testNs := testNamespace("evpn-all-corrupted-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-all-corrupted", vtep.Name, testNs.Name) + + // Create a corrupted NAD owned by the CUDN - NetworkManager will fail to parse it + corruptedNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 0, 0) + corruptedNAD.Spec.Config = `{"transport":"evpn", invalid json - corrupted` + + // Use started NetworkManager - it will fail to parse the corrupted NAD + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, corruptedNAD) + + // Controller should start successfully (VID recovery failure logged but not fatal) + Expect(c.Run()).To(Succeed()) + + // The CUDN is enqueued for reconciliation and gets a new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Should allocate new VID since recovery failed (first available)") + }).Should(Succeed()) + }) + + It("should continue startup and allocate new VID when VID recovery encounters a conflict", func() { + // VID conflicts during recovery no longer block startup. + // Instead, the CUDN is enqueued for reconciliation and gets a new VID. + testNs := testNamespace("evpn-vid-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-conflict", vtep.Name, testNs.Name) + + // Create a NAD with VID 5 for MAC-VRF + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 5, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 5 for a DIFFERENT key to create a conflict during recovery + Expect(c.vidAllocator.ReserveID("conflicting-network/macvrf", 5)).To(Succeed()) + + // Controller should start successfully despite the conflict + Expect(c.Run()).To(Succeed()) + + // Recovery fails due to conflict, CUDN is enqueued for reconciliation and gets a new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Should allocate new VID since 5 is taken by another network (first available)") + }).Should(Succeed()) + }) + + It("should continue startup and preserve MAC-VRF VID when only IP-VRF VID recovery encounters a conflict", func() { + // When IP-VRF VID conflicts but MAC-VRF VID is available: + // - MAC-VRF recovery succeeds (VID reserved in allocator) + // - IP-VRF recovery fails (conflict) + // - CUDN is enqueued for reconciliation + // - MAC-VRF VID is preserved (already in allocator), IP-VRF gets new VID + testNs := testNamespace("evpn-ipvrf-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-ipvrf-conflict", vtep.Name, testNs.Name) + + // Create a symmetric IRB NAD with both MAC-VRF (VID 3) and IP-VRF (VID 7) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 3, 7) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 7 for IP-VRF of a DIFFERENT network to create a conflict + Expect(c.vidAllocator.ReserveID("other-network/ipvrf", 7)).To(Succeed()) + + // Controller should start successfully + Expect(c.Run()).To(Succeed()) + + // MAC-VRF VID 3 was successfully reserved during recovery. + // IP-VRF VID 7 conflicted, so during reconciliation it gets new VID 2 (first available). + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(3), "MAC-VRF VID should be preserved (recovery succeeded)") + g.Expect(ipVID).To(Equal(2), "IP-VRF gets new VID (first available, 0,1 reserved, 7 is taken)") + }).Should(Succeed()) + }) + + It("should continue startup and preserve IP-VRF VID when only MAC-VRF VID recovery encounters a conflict", func() { + // When MAC-VRF VID conflicts but IP-VRF VID is available: + // - MAC-VRF recovery fails (conflict) + // - IP-VRF recovery succeeds (VID reserved in allocator) + // - CUDN is enqueued for reconciliation + // - MAC-VRF gets new VID, IP-VRF VID is preserved + testNs := testNamespace("evpn-macvrf-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-macvrf-conflict", vtep.Name, testNs.Name) + + // Create a symmetric IRB NAD with both MAC-VRF (VID 3) and IP-VRF (VID 7) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 3, 7) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 3 for a DIFFERENT network to create a conflict during recovery + Expect(c.vidAllocator.ReserveID("other-network/macvrf", 3)).To(Succeed()) + + // Controller should start successfully + Expect(c.Run()).To(Succeed()) + + // IP-VRF VID 7 was successfully reserved during recovery. + // MAC-VRF VID 3 conflicted, so during reconciliation it gets new VID 2 (first available). + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF gets new VID (first available, 0,1 reserved, 3 is already taken)") + g.Expect(ipVID).To(Equal(7), "IP-VRF VID should be preserved (recovery succeeded)") + }).Should(Succeed()) + }) + + It("should not fail startup when CUDN exists but has no NADs yet", func() { + vtep := testVTEP("vtep-test") + // Create a CUDN without any NADs (namespace doesn't match selector) + cudnWithNoNADs := testEVPNClusterUDN("evpn-no-nads", vtep.Name, "nonexistent-ns") + + c = newTestControllerWithNetworkManager(renderNadStub(nil), cudnWithNoNADs, vtep) + + Expect(c.Run()).To(Succeed(), "Controller should start even when CUDN has no NADs") + + // No VID should be allocated since there are no NADs + Expect(c.vidAllocator.GetID("evpn-no-nads/macvrf")).To(Equal(-1), "No VID should be allocated for CUDN without NADs") + }) + + It("should recover VIDs from NetworkManager cache at startup", func() { + // This tests the production startup recovery path where: + // 1. NetworkManager is started and processes existing NADs + // 2. UDN controller starts and recovers VIDs from NetworkManager's cache + testNs := testNamespace("evpn-nm-recovery-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-nm-recovery", vtep.Name, testNs.Name) + + // Create an existing NAD with VID 42 (simulating a previous controller run) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 42, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + Expect(c.Run()).To(Succeed()) + + // VID should be recovered from NetworkManager cache at startup + Eventually(func() int { + return c.vidAllocator.GetID("evpn-nm-recovery/macvrf") + }).Should(Equal(42), "VID 42 should be recovered from NetworkManager cache at startup") + }) + + It("should recover VIDs in deterministic order based on CUDN creation timestamp", func() { + // When two CUDNs have NADs claiming the same VID, the older CUDN wins. + // This ensures deterministic behavior across restarts. + testNs1 := testNamespace("evpn-order-test-1") + testNs2 := testNamespace("evpn-order-test-2") + vtep := testVTEP("vtep-test") + + // Create two CUDNs with different creation timestamps and unique UIDs + olderCUDN := testEVPNClusterUDN("aaa-older-cudn", vtep.Name, testNs1.Name) + olderCUDN.UID = "older-uid-1" + olderCUDN.CreationTimestamp = metav1.NewTime(time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)) + + newerCUDN := testEVPNClusterUDN("zzz-newer-cudn", vtep.Name, testNs2.Name) + newerCUDN.UID = "newer-uid-2" + newerCUDN.CreationTimestamp = metav1.NewTime(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC)) + + // Both NADs claim VID 42 - this simulates a conflict scenario + olderNAD := testEVPNClusterUdnNADOwnedByCUDN(olderCUDN, testNs1.Name, vtep.Name, 42, 0) + newerNAD := testEVPNClusterUdnNADOwnedByCUDN(newerCUDN, testNs2.Name, vtep.Name, 42, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, + olderCUDN, newerCUDN, testNs1, testNs2, vtep, olderNAD, newerNAD) + Expect(c.Run()).To(Succeed()) + + // The older CUDN should win the VID 42, regardless of alphabetical name order + // (newerCUDN has name "zzz-newer-cudn" which comes after "aaa-older-cudn" alphabetically, + // but olderCUDN should still win because it was created first) + Eventually(func() int { + return c.vidAllocator.GetID("aaa-older-cudn/macvrf") + }).Should(Equal(42), "Older CUDN should keep VID 42") + + // The newer CUDN loses the conflict and gets a new VID during reconciliation + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs2.Name).Get(context.Background(), newerCUDN.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Newer CUDN should get new VID (first available) since older CUDN won VID 42") + }).Should(Succeed()) + }) + + It("should return error when VID pool is exhausted", func() { + testNs := testNamespace("evpn-exhaustion-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-exhaust-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + + // Exhaust all available VIDs (2-4094) before starting the controller (0,1 already reserved) + for i := 2; i < MaxEVPNVIDs; i++ { + err := c.vidAllocator.ReserveID(fmt.Sprintf("exhaust-key-%d", i), i) + Expect(err).NotTo(HaveOccurred(), "should allocate VID %d", i) + } + + // Now start the controller - the EVPN CUDN should fail to get a VID + Expect(c.Run()).To(Succeed()) + + // Verify the pool is exhausted + _, err := c.vidAllocator.AllocateID("one-more-key") + Expect(err).To(HaveOccurred(), "VID pool should be exhausted") + + // The CUDN should report a sync error because VID allocation failed + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "NetworkAttachmentDefinitionSyncError", + Message: "failed to allocate EVPN VIDs: failed to allocate VID for MAC-VRF: failed to allocate the id for the resource evpn-exhaust-cudn/macvrf", + }}), "should report VID allocation failure in status") + + // Verify NAD was not created + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VID allocation fails") + }) + + It("should allocate VID after pool is freed up", func() { + testNs := testNamespace("evpn-free-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-free-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + + // Exhaust all VIDs except one (starting from 2, since 0,1 already reserved) + for i := 2; i < MaxEVPNVIDs-1; i++ { + err := c.vidAllocator.ReserveID(fmt.Sprintf("exhaust-key-%d", i), i) + Expect(err).NotTo(HaveOccurred()) + } + + // Start controller - it should successfully allocate the last available VID + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-free-test]", + }}), "should successfully create network with last available VID") + + // Verify the VID was allocated + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(MaxEVPNVIDs-1), "should get the last available VID") + }).Should(Succeed()) + }) + + It("should fail to start if VID 0 is already reserved by another resource", func() { + // This tests the defensive check that VID 0 (reserved per IEEE 802.1Q) + // must be reservable during controller initialization. + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest) + + // Reserve VID 0 with a DIFFERENT key (simulating corruption/bug) + Expect(c.vidAllocator.ReserveID("some-other-key", 0)).To(Succeed()) + + // Run should fail because initializeController can't reserve VID 0 + err := c.Run() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to reserve VID 0")) + }) + + It("should allocate new VID when namespace and NAD are created at runtime", func() { + // Scenario: Allocator has no VID for this key, namespace/NAD created at runtime + // This can happen when: + // - CUDN exists but had no matching namespaces at startup (no NADs to recover) + // - Admin later creates a namespace + // - Controller reconciles and allocates a new VID + // + // 1. Controller starts with CUDN but NO matching namespaces (no NADs created) + // 2. Allocator has NO VID for this key after startup + // 3. Namespace is created at runtime + // 4. Controller reconciles and allocates VID 2 (first available, 0,1 reserved) + vtep := testVTEP("vtep-test") + + // Namespace that doesn't exist at startup + const runtimeNsName = "runtime-ns-test" + + // CUDN with selector matching a namespace that doesn't exist yet + cudn := testEVPNClusterUDN("evpn-runtime-cudn", vtep.Name, runtimeNsName) + + // Start controller - no NADs to recover, allocator empty for this key + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, vtep) + Expect(c.Run()).To(Succeed()) + + // Create namespace at runtime (NAD will be created by controller) + testNs := testNamespace(runtimeNsName) + _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), testNs, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Controller reconciles and allocates VID 2 (first available, 0,1 reserved) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated (first available, 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate new VID when existing NAD has VID taken by another CUDN", func() { + // Scenario: Allocator has no VID for this key, but NAD's VID is taken by another CUDN + // This can happen when: + // - CUDN-A had no matching namespaces at startup + // - CUDN-B had a NAD with VID 42 that was recovered + // - Someone manually creates NAD for CUDN-A with VID 42 (collision) + // + // 1. Controller starts with CUDN but NO matching namespaces + // 2. VID 42 is already reserved by a different CUDN + // 3. Namespace and NAD with VID 42 are created at runtime + // 4. Controller reconciles + // 5. VID 42 can't be reserved (taken) -> new VID allocated + vtep := testVTEP("vtep-test") + + // Namespace that doesn't exist at startup + const runtimeNsName = "runtime-conflict-test" + + cudn := testEVPNClusterUDN("evpn-runtime-conflict", vtep.Name, runtimeNsName) + + // Start controller - no NADs to recover, allocator empty for this key + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, vtep) + + // VID 42 is already reserved by another CUDN (simulates collision) + Expect(c.vidAllocator.ReserveID("another-cudn/macvrf", 42)).To(Succeed()) + + Expect(c.Run()).To(Succeed()) + + // Create namespace and NAD with VID 42 at runtime (collision with another CUDN) + testNs := testNamespace(runtimeNsName) + runtimeNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 42, 0) + + _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), testNs, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Create(context.Background(), runtimeNAD, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Controller reconciles - VID 42 is taken, must allocate new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be newly allocated since 42 is taken by another CUDN (first available)") + }).Should(Succeed()) + }) + + It("should revert manual NAD VID change when allocator already has VID for this key", func() { + // This tests the case where: + // - Allocator has VID 2 for this key (from initial NAD creation, first available) + // - Someone manually changes NAD to VID 42 + // - Allocator's VID 2 should win, NAD reverted to 2 + // Note: Whether VID 42 is free or taken doesn't matter - the allocator's + // existing VID takes precedence because ReserveID fails when key already has a VID. + testNs := testNamespace("evpn-vid-manual-change-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-manual-change-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for initial NAD creation (will get VID 2, first available) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Initial VID should be 2 (first available)") + }).Should(Succeed()) + + // Now manually update the NAD with VID 42 + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(setNADEVPNVIDs(nad, 42, 0)).To(Succeed()) + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Update(context.Background(), nad, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // The NAD update triggers reconciliation. The allocator already has VID 2 + // for this key, so NAD is reverted to 2. + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be reverted to allocator's VID") + }).Should(Succeed()) + }) + + It("should report VTEPNotFound when EVPN CUDN references non-existent VTEP", func() { + testNs := testNamespace("evpn-vtep-missing-test") + cudn := testEVPNClusterUDN("evpn-vtep-missing", "default", testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs) + Expect(c.Run()).To(Succeed()) + + // CUDN should report VTEPNotFound status + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP 'default' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should report VTEPNotFound in status") + + // NAD should not be created when VTEP is missing + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VTEP is missing") + }) + + It("should create NAD when VTEP exists for EVPN CUDN", func() { + testNs := testNamespace("evpn-vtep-exists-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-vtep-exists", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // CUDN should succeed when VTEP exists + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-exists-test]", + }}), "should succeed when VTEP exists") + + // NAD should be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created when VTEP exists") + }) + + It("should automatically reconcile CUDN when VTEP is created after CUDN", func() { + testNs := testNamespace("evpn-vtep-transition-test") + vtepName := "default" + cudn := testEVPNClusterUDN("evpn-vtep-transition", vtepName, testNs.Name) + + // Start controller WITHOUT the VTEP - CUDN references non-existent VTEP + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs) + Expect(c.Run()).To(Succeed()) + + // Step 1: CUDN should initially report VTEPNotFound + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP '" + vtepName + "' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should initially report VTEPNotFound") + + // NAD should NOT exist yet + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VTEP is missing") + + // Step 2: Create the VTEP dynamically - this should trigger VTEPNotifier + vtep := testVTEP(vtepName) + _, err = cs.VTEPClient.K8sV1().VTEPs().Create(context.Background(), vtep, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Step 3: CUDN should be automatically reconciled and succeed + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-transition-test]", + }}), "should succeed after VTEP is created") + + // NAD should now be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created after VTEP is created") + }) + + It("should only re-queue EVPN CUDNs when VTEP changes, not non-EVPN CUDNs", func() { + testNs := testNamespace("vtep-filter-test") + vtep := testVTEP("vtep-filter") + + // Create a non-EVPN CUDN (Layer2 without EVPN transport) + nonEvpnCUDN := testClusterUDN("non-evpn-cudn", testNs.Name) + nonEvpnCUDN.UID = "non-evpn-uid" + + // Create an EVPN CUDN that references the VTEP + evpnCUDN := testEVPNClusterUDN("evpn-cudn", vtep.Name, testNs.Name) + evpnCUDN.UID = "evpn-uid" + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, nonEvpnCUDN, evpnCUDN, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for EVPN NAD to be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), evpnCUDN.Name, metav1.GetOptions{}) + return err + }).Should(Succeed()) + + // ReconcileVTEP should iterate over all CUDNs but only match the EVPN one + // This covers the non-EVPN path in cudnReferencesVTEP + err := c.ReconcileVTEP(vtep.Name) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should report VTEPNotFound when VTEP is deleted after CUDN creation", func() { + testNs := testNamespace("evpn-vtep-delete-test") + vtep := testVTEP("vtep-to-delete") + cudn := testEVPNClusterUDN("evpn-vtep-delete", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Step 1: Verify NAD is created successfully when VTEP exists + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-delete-test]", + }}), "should initially succeed when VTEP exists") + + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created when VTEP exists") + + // Step 2: Delete the VTEP - this should trigger VTEPNotifier + err := cs.VTEPClient.K8sV1().VTEPs().Delete(context.Background(), vtep.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Step 3: CUDN should be re-reconciled and report VTEPNotFound + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP '" + vtep.Name + "' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should report VTEPNotFound after VTEP is deleted") + }) + + It("should fail when EVPN transport is requested but EVPN feature is disabled", func() { + // Disable EVPN feature flag for this test. + // No defer needed - BeforeEach resets config via PrepareTestConfig(). + config.OVNKubernetesFeature.EnableEVPN = false + + testNs := testNamespace("evpn-disabled-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-disabled-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // CUDN should report error with message about EVPN flag + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "NetworkAttachmentDefinitionSyncError", + Message: "EVPN transport requested but EVPN feature is not enabled", + }}), "should report error when EVPN flag is disabled") + + // NAD should not be created when EVPN is disabled + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when EVPN is disabled") + }) + It("should update NAD annotations and preserve internal OVNK annotations on UDN update", func() { testNamespaces := []string{"red", "blue"} var objs []runtime.Object @@ -1202,6 +2058,32 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).To(MatchError(expectedErr)) }) + It("when namespace without pods is being deleted, should delete NAD in that namespace", func() { + const cudnName = "test-network" + testNs := testNamespace("blue") + cudn := testClusterUDN(cudnName, testNs.Name) + expectedNAD := testClusterUdnNAD(cudnName, testNs.Name) + c := newTestController(renderNadStub(expectedNAD), cudn, testNs) + Expect(c.Run()).To(Succeed()) + + By("verify NAD is created in namespace") + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return err + }).Should(Succeed()) + + By("mark namespace as terminating") + testNs.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err := cs.KubeClient.CoreV1().Namespaces().Update(context.Background(), testNs, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + By("verify NAD is deleted") + Eventually(func() bool { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }).Should(BeTrue(), "NAD should be deleted when namespace is terminating") + }) + It("when CR is deleted, CR has no finalizer, should succeed", func() { deletedCUDN := testClusterUDN("test", "blue") deletedCUDN.Finalizers = []string{} @@ -1604,7 +2486,208 @@ func failRenderNadStub(err error) RenderNetAttachDefManifest { } func newRenderNadStub(nad *netv1.NetworkAttachmentDefinition, err error) RenderNetAttachDefManifest { - return func(client.Object, string) (*netv1.NetworkAttachmentDefinition, error) { + return func(client.Object, string, ...template.RenderOption) (*netv1.NetworkAttachmentDefinition, error) { return nad, err } } + +func testEVPNClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"10.10.10.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + }, + }, + }, + }, + } +} + +// testEVPNClusterUdnNADWithVIDs creates an EVPN NAD with specific MAC-VRF and IP-VRF VIDs. +// Pass 0 for ipVID to create a MAC-VRF only NAD. +func testEVPNClusterUdnNADWithVIDs(name, namespace, vtepName string, macVID, ipVID int) *netv1.NetworkAttachmentDefinition { + nad := testClusterUdnNAD(name, namespace) + if ipVID > 0 { + // Symmetric IRB (both MAC-VRF and IP-VRF) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) + } else { + // MAC-VRF only + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) + } + return nad +} + +// testEVPNClusterUdnNADOwnedByCUDN creates an EVPN NAD with specific VIDs and sets up +// the OwnerReferences to indicate ownership by the given CUDN. +func testEVPNClusterUdnNADOwnedByCUDN(cudn *udnv1.ClusterUserDefinedNetwork, namespace, vtepName string, macVID, ipVID int) *netv1.NetworkAttachmentDefinition { + nad := testEVPNClusterUdnNADWithVIDs(cudn.Name, namespace, vtepName, macVID, ipVID) + nad.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterUserDefinedNetwork", + Name: cudn.Name, + UID: cudn.UID, + Controller: ptr.To(true), + BlockOwnerDeletion: ptr.To(true), + }, + } + return nad +} + +func testSymmetricIRBClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"10.10.10.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + }, + }, + }, + }, + } +} + +func testEVPNIPVRFClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRoleSecondary, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + }, + }, + }, + }, + } +} + +func testVTEP(name string) *vtepv1.VTEP { + return &vtepv1.VTEP{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + UID: types.UID("vtep-" + name), + }, + Spec: vtepv1.VTEPSpec{ + CIDRs: vtepv1.DualStackCIDRs{"100.64.0.0/24"}, + Mode: vtepv1.VTEPModeManaged, + }, + } +} + +// evpnVIDsFromNAD extracts MAC-VRF and IP-VRF VIDs from a NAD config. +// Returns (macVID, ipVID) where 0 indicates the VRF is not present or has no VID. +func evpnVIDsFromNAD(nad *netv1.NetworkAttachmentDefinition) (macVID, ipVID int) { + if nad == nil { + return 0, 0 + } + var netConf ovncnitypes.NetConf + if err := json.Unmarshal([]byte(nad.Spec.Config), &netConf); err != nil { + return 0, 0 + } + if netConf.EVPN == nil { + return 0, 0 + } + if netConf.EVPN.MACVRF != nil { + macVID = netConf.EVPN.MACVRF.VID + } + if netConf.EVPN.IPVRF != nil { + ipVID = netConf.EVPN.IPVRF.VID + } + return macVID, ipVID +} + +// setNADEVPNVIDs modifies the MAC-VRF and/or IP-VRF VIDs in a NAD config. +// Pass 0 to leave a VID unchanged. This is used in tests to set specific VIDs +// without rewriting the entire config. +func setNADEVPNVIDs(nad *netv1.NetworkAttachmentDefinition, macVID, ipVID int) error { + var netConf ovncnitypes.NetConf + if err := json.Unmarshal([]byte(nad.Spec.Config), &netConf); err != nil { + return err + } + if netConf.EVPN == nil { + return fmt.Errorf("NAD has no EVPN config") + } + if macVID > 0 { + if netConf.EVPN.MACVRF == nil { + return fmt.Errorf("NAD has no EVPN MAC-VRF config") + } + netConf.EVPN.MACVRF.VID = macVID + } + if ipVID > 0 { + if netConf.EVPN.IPVRF == nil { + return fmt.Errorf("NAD has no EVPN IP-VRF config") + } + netConf.EVPN.IPVRF.VID = ipVID + } + configBytes, err := json.Marshal(netConf) + if err != nil { + return err + } + nad.Spec.Config = string(configBytes) + return nil +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go index 90ff81befc..d6dbf634f2 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go @@ -46,10 +46,11 @@ func NewNamespaceNotifier(nsInformer corev1informer.NamespaceInformer, subscribe func (c *NamespaceNotifier) needUpdate(old, new *corev1.Namespace) bool { nsCreated := old == nil && new != nil nsDeleted := old != nil && new == nil + nsDeleting := new != nil && !new.DeletionTimestamp.IsZero() nsLabelsChanged := old != nil && new != nil && !reflect.DeepEqual(old.Labels, new.Labels) - return nsCreated || nsDeleted || nsLabelsChanged + return nsCreated || nsDeleted || nsDeleting || nsLabelsChanged } // reconcile notify subscribers with the request namespace key following namespace events. diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go index afe0d93c03..50ed844a34 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go @@ -15,6 +15,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" udnv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -40,6 +41,7 @@ var _ = Describe("NamespaceNotifier", func() { KubeClient: kubeClient, NetworkAttchDefClient: netv1fake.NewSimpleClientset(), UserDefinedNetworkClient: udnv1fake.NewSimpleClientset(), + VTEPClient: vtepv1fake.NewSimpleClientset(), } var err error wf, err = factory.NewClusterManagerWatchFactory(fakeClient) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go new file mode 100644 index 0000000000..75eb8d7fcb --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go @@ -0,0 +1,70 @@ +package notifier + +import ( + "errors" + + "k8s.io/client-go/util/workqueue" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" +) + +// VTEPReconciler is the interface for controllers that need to react to VTEP events. +type VTEPReconciler interface { + ReconcileVTEP(key string) error +} + +// VTEPNotifier watches VTEP objects and notifies subscribers upon change. +// It enqueues the reconciled object keys in the subscribing controllers workqueue. +type VTEPNotifier struct { + Controller controller.Controller + + subscribers []VTEPReconciler +} + +// NewVTEPNotifier creates a new VTEPNotifier that watches VTEP CRs and notifies subscribers. +func NewVTEPNotifier(vtepInformer vtepinformer.VTEPInformer, subscribers ...VTEPReconciler) *VTEPNotifier { + c := &VTEPNotifier{ + subscribers: subscribers, + } + + vtepLister := vtepInformer.Lister() + cfg := &controller.ControllerConfig[vtepv1.VTEP]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Reconcile: c.reconcile, + ObjNeedsUpdate: c.needUpdate, + Threadiness: 1, + Informer: vtepInformer.Informer(), + Lister: vtepLister.List, + } + c.Controller = controller.NewController("udn-vtep-controller", cfg) + + return c +} + +// needUpdate returns true when the VTEP has been created or deleted. +// We notify on create/delete so that CUDNs referencing this VTEP can be re-queued. +// IMPORTANT: Before adding update notifications, verify that all subscribers +// can handle increased event frequency. +func (c *VTEPNotifier) needUpdate(old, new *vtepv1.VTEP) bool { + vtepCreated := old == nil && new != nil + vtepDeleted := old != nil && new == nil + return vtepCreated || vtepDeleted +} + +// reconcile notifies subscribers with the VTEP key following VTEP events. +func (c *VTEPNotifier) reconcile(key string) error { + var errs []error + for _, subscriber := range c.subscribers { + if subscriber != nil { + // enqueue the reconciled VTEP key in the subscribers workqueue to + // enable the subscriber to act on VTEP changes + if err := subscriber.ReconcileVTEP(key); err != nil { + errs = append(errs, err) + } + } + } + + return errors.Join(errs...) +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go new file mode 100644 index 0000000000..111106f3e5 --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go @@ -0,0 +1,198 @@ +package notifier + +import ( + "context" + "maps" + "strconv" + "sync" + + netv1fake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + frrfake "github.com/metallb/frr-k8s/pkg/client/clientset/versioned/fake" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + rafake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/fake" + udnv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("VTEPNotifier", func() { + var ( + vtepClient *vtepv1fake.Clientset + wf *factory.WatchFactory + testVTEPNotifier *VTEPNotifier + ) + + BeforeEach(func() { + vtepClient = vtepv1fake.NewSimpleClientset() + + // enable features to make watch-factory start the VTEP informer + Expect(config.PrepareTestConfig()).To(Succeed()) + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: fake.NewSimpleClientset(), + NetworkAttchDefClient: netv1fake.NewSimpleClientset(), + UserDefinedNetworkClient: udnv1fake.NewSimpleClientset(), + RouteAdvertisementsClient: rafake.NewSimpleClientset(), + FRRClient: frrfake.NewSimpleClientset(), + VTEPClient: vtepClient, + } + var err error + wf, err = factory.NewClusterManagerWatchFactory(fakeClient) + Expect(err).NotTo(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + }) + + AfterEach(func() { + wf.Shutdown() + }) + + var s *testVTEPSubscriber + + BeforeEach(func() { + s = &testVTEPSubscriber{reconciledKeys: map[string]int64{}} + testVTEPNotifier = NewVTEPNotifier(wf.VTEPInformer(), s) + Expect(controller.Start(testVTEPNotifier.Controller)).Should(Succeed()) + + // create test VTEPs + for i := 0; i < 3; i++ { + vtepName := "test-vtep-" + strconv.Itoa(i) + _, err := vtepClient.K8sV1().VTEPs().Create(context.Background(), testVTEP(vtepName), metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + }) + + AfterEach(func() { + if testVTEPNotifier != nil { + controller.Stop(testVTEPNotifier.Controller) + } + }) + + It("should notify VTEP create events", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + }) + + It("should notify VTEP delete events", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + + Expect(vtepClient.K8sV1().VTEPs().Delete(context.Background(), "test-vtep-2", metav1.DeleteOptions{})).To(Succeed()) + Expect(vtepClient.K8sV1().VTEPs().Delete(context.Background(), "test-vtep-0", metav1.DeleteOptions{})).To(Succeed()) + + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 2, + "test-vtep-1": 1, + "test-vtep-2": 2, + }), "should record additional two events, following VTEP deletion") + }) + + It("should NOT notify VTEP update events (spec/status changes)", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + + // Update VTEP spec (change CIDRs) + vtep, err := vtepClient.K8sV1().VTEPs().Get(context.Background(), "test-vtep-1", metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + vtep.Spec.CIDRs = vtepv1.DualStackCIDRs{"192.168.0.0/24"} + _, err = vtepClient.K8sV1().VTEPs().Update(context.Background(), vtep, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Updates should NOT trigger notification (needUpdate returns false for updates) + Consistently(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + }), "should NOT record additional events following VTEP update") + }) + + It("should notify multiple subscribers", func() { + // Stop the single-subscriber notifier + controller.Stop(testVTEPNotifier.Controller) + + // Create a second subscriber + s2 := &testVTEPSubscriber{reconciledKeys: map[string]int64{}} + + // Create a new notifier with multiple subscribers + testVTEPNotifier = NewVTEPNotifier(wf.VTEPInformer(), s, s2) + Expect(controller.Start(testVTEPNotifier.Controller)).Should(Succeed()) + + // Create a new VTEP + _, err := vtepClient.K8sV1().VTEPs().Create(context.Background(), testVTEP("test-vtep-new"), metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Both subscribers should be notified exactly once + Eventually(func(g Gomega) { + keys1 := s.GetReconciledKeys() + keys2 := s2.GetReconciledKeys() + g.Expect(keys1["test-vtep-new"]).To(BeEquivalentTo(1), "subscriber 1 should be notified exactly once") + g.Expect(keys2["test-vtep-new"]).To(BeEquivalentTo(1), "subscriber 2 should be notified exactly once") + }).Should(Succeed()) + }) +}) + +func testVTEP(name string) *vtepv1.VTEP { + return &vtepv1.VTEP{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: vtepv1.VTEPSpec{ + CIDRs: vtepv1.DualStackCIDRs{"10.10.10.0/24"}, + Mode: vtepv1.VTEPModeManaged, + }, + } +} + +type testVTEPSubscriber struct { + err error + reconciledKeys map[string]int64 + lock sync.RWMutex +} + +func (s *testVTEPSubscriber) ReconcileVTEP(key string) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.reconciledKeys[key]++ + return s.err +} + +func (s *testVTEPSubscriber) GetReconciledKeys() map[string]int64 { + s.lock.RLock() + defer s.lock.RUnlock() + + cp := map[string]int64{} + maps.Copy(cp, s.reconciledKeys) + return cp +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index e451ed3923..62850d4a23 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -33,15 +33,17 @@ type SpecGetter interface { GetLayer3() *userdefinednetworkv1.Layer3Config GetLayer2() *userdefinednetworkv1.Layer2Config GetLocalnet() *userdefinednetworkv1.LocalnetConfig + GetTransport() userdefinednetworkv1.TransportOption + GetEVPN() *userdefinednetworkv1.EVPNConfig } -func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*netv1.NetworkAttachmentDefinition, error) { +func RenderNetAttachDefManifest(obj client.Object, targetNamespace string, opts ...RenderOption) (*netv1.NetworkAttachmentDefinition, error) { if obj == nil { return nil, nil } if targetNamespace == "" { - return nil, fmt.Errorf("namspace should not be empty") + return nil, fmt.Errorf("namespace should not be empty") } var ownerRef metav1.OwnerReference @@ -62,7 +64,7 @@ func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*net nadName := util.GetNADName(targetNamespace, obj.GetName()) - nadSpec, err := RenderNADSpec(networkName, nadName, spec) + nadSpec, err := renderNADSpec(networkName, nadName, spec, applyOptions(opts)) if err != nil { return nil, err } @@ -79,12 +81,12 @@ func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*net }, nil } -func RenderNADSpec(networkName, nadName string, spec SpecGetter) (*netv1.NetworkAttachmentDefinitionSpec, error) { +func renderNADSpec(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (*netv1.NetworkAttachmentDefinitionSpec, error) { if err := validateTopology(spec); err != nil { return nil, fmt.Errorf("invalid topology specified: %w", err) } - cniNetConf, err := renderCNINetworkConfig(networkName, nadName, spec) + cniNetConf, err := renderCNINetworkConfig(networkName, nadName, spec, opts) if err != nil { return nil, fmt.Errorf("failed to render CNI network config: %w", err) } @@ -98,7 +100,7 @@ func RenderNADSpec(networkName, nadName string, spec SpecGetter) (*netv1.Network }, nil } -// renderNADLabels copies labels from UDN to help RenderNADSpec +// renderNADLabels copies labels from UDN to help renderNADSpec // function add those labels to corresponding NAD func renderNADLabels(obj client.Object) map[string]string { labels := make(map[string]string) @@ -134,15 +136,16 @@ func validateTopology(spec SpecGetter) error { return nil } -func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[string]interface{}, error) { +func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (map[string]interface{}, error) { netConfSpec := &ovncnitypes.NetConf{ NetConf: cnitypes.NetConf{ CNIVersion: cniVersion, Type: OvnK8sCNIOverlay, Name: networkName, }, - NADName: nadName, - Topology: strings.ToLower(string(spec.GetTopology())), + NADName: nadName, + Topology: strings.ToLower(string(spec.GetTopology())), + Transport: transportFromCRD(string(spec.GetTransport())), } switch spec.GetTopology() { @@ -194,6 +197,14 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s netConfSpec.VLANID = int(cfg.VLAN.Access.ID) } } + + if spec.GetTransport() == userdefinednetworkv1.TransportOptionEVPN { + if !util.IsEVPNEnabled() { + return nil, fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + netConfSpec.EVPN = renderEVPNConfig(spec, opts) + } + if netConfSpec.AllowPersistentIPs && !config.OVNKubernetesFeature.EnablePersistentIPs { return nil, fmt.Errorf("allowPersistentIPs is set but persistentIPs is Disabled") } @@ -256,9 +267,33 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s cniNetConf["defaultGatewayIPs"] = netConfSpec.DefaultGatewayIPs } } + + if netConfSpec.Transport != "" { + cniNetConf["transport"] = netConfSpec.Transport + } + if netConfSpec.EVPN != nil { + cniNetConf["evpn"] = netConfSpec.EVPN + } + return cniNetConf, nil } +// transportFromCRD converts CRD PascalCase format to canonical format. +// CRD format uses PascalCase: "Geneve", "NoOverlay", "EVPN" +// Returns canonical lowercase format: "geneve", "no-overlay", "evpn" +func transportFromCRD(crdTransport string) string { + switch crdTransport { + case "Geneve": + return types.NetworkTransportGeneve + case "NoOverlay": + return types.NetworkTransportNoOverlay + case "EVPN": + return types.NetworkTransportEVPN + default: + return crdTransport // Return as-is for validation to catch + } +} + func localnetMTU(desiredMTU int32) int { // The MTU for localnet topology should be as the default MTU (1500) because the underlay // is not part of the SDN and compensating for the SDN overhead (100) is not required. @@ -332,6 +367,36 @@ func ipString(ips userdefinednetworkv1.DualStackIPs) string { return strings.Join(ipStrings, ",") } +// renderEVPNConfig converts the EVPN configuration from the spec into the CNI EVPNConfig format. +// Note: evpnCfg is guaranteed to be non-nil by CEL validation on the CRD. +func renderEVPNConfig(spec SpecGetter, opts *RenderOptions) *ovncnitypes.EVPNConfig { + evpnCfg := spec.GetEVPN() + evpnConfig := &ovncnitypes.EVPNConfig{ + VTEP: evpnCfg.VTEP, + } + + if evpnCfg.MACVRF != nil { + evpnConfig.MACVRF = &ovncnitypes.VRFConfig{ + VNI: evpnCfg.MACVRF.VNI, + RouteTarget: string(evpnCfg.MACVRF.RouteTarget), + } + if opts != nil && opts.EVPNVIDs != nil && opts.EVPNVIDs.MACVRFVID > 0 { + evpnConfig.MACVRF.VID = opts.EVPNVIDs.MACVRFVID + } + } + if evpnCfg.IPVRF != nil { + evpnConfig.IPVRF = &ovncnitypes.VRFConfig{ + VNI: evpnCfg.IPVRF.VNI, + RouteTarget: string(evpnCfg.IPVRF.RouteTarget), + } + if opts != nil && opts.EVPNVIDs != nil && opts.EVPNVIDs.IPVRFVID > 0 { + evpnConfig.IPVRF.VID = opts.EVPNVIDs.IPVRFVID + } + } + + return evpnConfig +} + func GetSpec(obj client.Object) SpecGetter { switch o := obj.(type) { case *userdefinednetworkv1.UserDefinedNetwork: diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index e44cee4366..5881617c6b 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -1,6 +1,7 @@ package template import ( + "encoding/json" "strings" netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" @@ -21,15 +22,20 @@ import ( var _ = Describe("NetAttachDefTemplate", func() { - // before each test, set the IPv4Mode and IPv6Mode to true BeforeEach(func() { + // Restore global default values before each testcase + Expect(config.PrepareTestConfig()).To(Succeed()) config.IPv4Mode = true config.IPv6Mode = true + // Enable EVPN for tests that use EVPN transport + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true }) DescribeTable("should fail to render NAD spec given", func(spec *udnv1.UserDefinedNetworkSpec, expectedError string) { - _, err := RenderNADSpec("foo", "bar", spec) + _, err := renderNADSpec("foo", "bar", spec, nil) Expect(err).To(MatchError(ContainSubstring(expectedError))) }, Entry("invalid layer2 subnets", @@ -631,8 +637,342 @@ var _ = Describe("NetAttachDefTemplate", func() { "allowPersistentIPs": true }`, ), + Entry("primary network, layer2 with EVPN transport and MAC-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100, + "routeTarget": "65000:100" + } + } + }`, + ), + Entry("primary network, layer3 with EVPN transport and IP-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: []udnv1.Layer3Subnet{ + {CIDR: "192.168.100.0/16"}, + }, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "65000:200", + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer3", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "subnets": "192.168.100.0/16", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "ipVRF": { + "vni": 200, + "routeTarget": "65000:200" + } + } + }`, + ), + Entry("primary network, layer2 with EVPN transport, MAC-VRF and IP-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "100000:100", // 4-byte ASN format + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "192.168.1.1:200", // IPv4 format + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100, + "routeTarget": "100000:100" + }, + "ipVRF": { + "vni": 200, + "routeTarget": "192.168.1.1:200" + } + } + }`, + ), + Entry("primary network, layer2 with EVPN transport, MAC-VRF with VNI only (no RouteTarget)", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + // RouteTarget intentionally omitted + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100 + } + } + }`, + ), ) + Context("EVPN VID injection", func() { + It("should inject VIDs into EVPN config when provided via WithEVPNVIDs", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "65000:200", + }, + }, + }, + }, + } + + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(12, 13)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + Expect(netConf.EVPN).NotTo(BeNil(), "evpnConfig should be present") + Expect(netConf.EVPN.MACVRF).NotTo(BeNil(), "macVRF should be present") + Expect(netConf.EVPN.MACVRF.VID).To(Equal(12), "macVRF VID should be 12") + Expect(netConf.EVPN.IPVRF).NotTo(BeNil(), "ipVRF should be present") + Expect(netConf.EVPN.IPVRF.VID).To(Equal(13), "ipVRF VID should be 13") + }) + + It("should omit VID when zero (VID=0 not injected)", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-no-vid", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + }, + } + + // Pass VID=0 for both (should be omitted from JSON, unmarshals as zero value) + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(0, 0)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + Expect(netConf.EVPN).NotTo(BeNil(), "evpnConfig should be present") + Expect(netConf.EVPN.MACVRF).NotTo(BeNil(), "macVRF should be present") + Expect(netConf.EVPN.MACVRF.VID).To(Equal(0), "VID should be zero when not injected") + + // Also verify the raw JSON doesn't contain "vid" field (omitempty) + Expect(nad.Spec.Config).NotTo(ContainSubstring(`"vid"`), "vid field should be omitted from JSON when zero") + }) + + It("should omit empty RouteTarget in EVPN config", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-no-rt", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + // RouteTarget intentionally omitted (empty) + }, + }, + }, + }, + } + + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(5, 0)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + // RouteTarget should be empty (omitted in JSON, unmarshals as empty string) + Expect(netConf.EVPN.MACVRF.RouteTarget).To(BeEmpty(), "empty routeTarget should unmarshal as empty string") + + // Also verify the raw JSON doesn't contain "routeTarget" field + Expect(nad.Spec.Config).NotTo(ContainSubstring(`"routeTarget"`), "routeTarget should be omitted from JSON when empty") + + // VID should be present + Expect(netConf.EVPN.MACVRF.VID).To(Equal(5), "macVRF VID should be 5") + }) + + It("should handle nil RenderOption without panic", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-nil-option", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + }, + }, + } + + // Pass nil option - should not panic + var nilOpt RenderOption + Expect(func() { + _, _ = RenderNetAttachDefManifest(cudn, "test-ns", nilOpt, WithEVPNVIDs(1, 2)) + }).NotTo(Panic()) + }) + + It("should fail when EVPN transport is requested but EVPN feature is disabled", func() { + // Disable EVPN feature flag for this test. + // No defer needed - BeforeEach resets config via PrepareTestConfig(). + config.OVNKubernetesFeature.EnableEVPN = false + + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-disabled", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + }, + } + + _, err := RenderNetAttachDefManifest(cudn, "test-ns") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("EVPN transport requested but EVPN feature is not enabled")) + }) + }) + It("should correctly assign transit Subnets", func() { // check no overlap, use default values netConf := &ovncnitypes.NetConf{ diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go new file mode 100644 index 0000000000..7b93ef34fd --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go @@ -0,0 +1,41 @@ +package template + +// RenderOption is a functional option for configuring NAD rendering. +type RenderOption func(*RenderOptions) + +// RenderOptions contains optional configuration for NAD rendering. +type RenderOptions struct { + EVPNVIDs *EVPNVIDs +} + +// EVPNVIDs contains pre-allocated VLAN IDs for EVPN MAC-VRF and IP-VRF. +type EVPNVIDs struct { + // MACVRFVID is the VLAN ID for the MAC-VRF (Layer 2 EVPN). + // A value of 0 means no VID is allocated for MAC-VRF. + MACVRFVID int + // IPVRFVID is the VLAN ID for the IP-VRF (Layer 3 EVPN). + // A value of 0 means no VID is allocated for IP-VRF. + IPVRFVID int +} + +// WithEVPNVIDs returns a RenderOption that sets the EVPN VIDs for rendering. +func WithEVPNVIDs(macVRFVID, ipVRFVID int) RenderOption { + return func(opts *RenderOptions) { + opts.EVPNVIDs = &EVPNVIDs{ + MACVRFVID: macVRFVID, + IPVRFVID: ipVRFVID, + } + } +} + +// applyOptions applies the given functional options and returns the resulting RenderOptions. +// Nil options in the slice are safely skipped to prevent panics. +func applyOptions(opts []RenderOption) *RenderOptions { + options := &RenderOptions{} + for _, opt := range opts { + if opt != nil { + opt(options) + } + } + return options +} diff --git a/go-controller/pkg/cni/types/types.go b/go-controller/pkg/cni/types/types.go index 0963f76507..8f8007e1fb 100644 --- a/go-controller/pkg/cni/types/types.go +++ b/go-controller/pkg/cni/types/types.go @@ -81,7 +81,7 @@ type NetConf struct { PhysicalNetworkName string `json:"physicalNetworkName,omitempty"` // Transport describes the transport protocol for east-west traffic. - // Valid values are "nooverlay", "geneve", and "evpn". + // Valid values are "no-overlay", "geneve", and "evpn". // Defaults to "geneve". Transport string `json:"transport,omitempty"` @@ -127,6 +127,9 @@ type VRFConfig struct { VNI int32 `json:"vni"` // RouteTarget is the BGP route target for this VRF. RouteTarget string `json:"routeTarget,omitempty"` + // VID is the VLAN ID used for local traffic segmentation on each node. + // Allocated cluster-wide by the UDN controller, one per VRF. + VID int `json:"vid,omitempty"` } // NetworkSelectionElement represents one element of the JSON format diff --git a/go-controller/pkg/cni/udn/primary_network.go b/go-controller/pkg/cni/udn/primary_network.go index c751a6cbda..8ac6fecfc6 100644 --- a/go-controller/pkg/cni/udn/primary_network.go +++ b/go-controller/pkg/cni/udn/primary_network.go @@ -152,6 +152,10 @@ func (p *UserDefinedPrimaryNetwork) ensureActiveNetwork(namespace string) error if err != nil { return err } + // CNI should always have an active network for a pod on our node + if activeNetwork == nil { + return fmt.Errorf("no active network found for namespace %s", namespace) + } if activeNetwork.IsDefault() { return fmt.Errorf("missing primary user defined network NAD for namespace '%s'", namespace) } diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index c67738e9ec..2f6cf72f24 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -1,6 +1,7 @@ package config import ( + "encoding/base64" "flag" "fmt" "net" @@ -101,6 +102,7 @@ var ( RawClusterSubnets: "10.128.0.0/14/23", Zone: types.OvnDefaultZone, RawUDNAllowedDefaultServices: "default/kubernetes,kube-system/kube-dns", + Transport: types.NetworkTransportGeneve, } // Logging holds logging-related parsed config file parameters and command-line overrides @@ -242,6 +244,14 @@ var ( V6TransitSubnet: "fd97::/64", } + // NoOverlay holds no-overlay mode configuration + NoOverlay = NoOverlayConfig{} + + // ManagedBGP holds managed BGP configuration + ManagedBGP = ManagedBGPConfig{ + ASNumber: 64512, // Default AS number + } + // Layer2UsesTransitRouter indicated whether the layer2 primary networks will use transit router. // It is a per-node setting and is also reflected in the node annotations. Layer2UsesTransitRouter bool @@ -253,6 +263,22 @@ const ( kubeServiceAccountFileCACert string = "ca.crt" ) +// No-overlay mode configuration option constants +const ( + // NoOverlayRoutingManaged indicates OVN-Kubernetes manages the routing + NoOverlayRoutingManaged string = "managed" + // NoOverlayRoutingUnmanaged indicates users manage the routing themselves + NoOverlayRoutingUnmanaged string = "unmanaged" + + // ManagedBGPTopologyFullMesh represents a full-mesh BGP topology + ManagedBGPTopologyFullMesh string = "full-mesh" + + // NoOverlaySNATEnabled enables SNAT for outbound traffic + NoOverlaySNATEnabled string = "enabled" + // NoOverlaySNATDisabled disables SNAT for outbound traffic + NoOverlaySNATDisabled string = "disabled" +) + // DefaultConfig holds parsed config file parameters and command-line overrides type DefaultConfig struct { // MTU value used for the overlay networks. @@ -336,6 +362,11 @@ type DefaultConfig struct { // UDNAllowedDefaultServices holds a list of namespaced names of // default cluster network services accessible from primary user-defined networks UDNAllowedDefaultServices []string + + // Transport specifies the transport technology used for the default network. + // Accepts: "geneve" or "no-overlay". + // Defaults to "geneve". + Transport string `gcfg:"transport"` } // LoggingConfig holds logging-related parsed config file parameters and command-line overrides @@ -407,6 +438,7 @@ type KubernetesConfig struct { CertDuration time.Duration `gcfg:"cert-duration"` Kubeconfig string `gcfg:"kubeconfig"` CACert string `gcfg:"cacert"` + CACertData string `gcfg:"cacert-data"` CAData []byte APIServer string `gcfg:"apiserver"` Token string `gcfg:"token"` @@ -475,6 +507,7 @@ type OVNKubernetesFeatureConfig struct { EnableServiceTemplateSupport bool `gcfg:"enable-svc-template-support"` EnableObservability bool `gcfg:"enable-observability"` EnableNetworkQoS bool `gcfg:"enable-network-qos"` + AllowICMPNetworkPolicy bool `gcfg:"allow-icmp-network-policy"` // This feature requires a kernel fix https://github.com/torvalds/linux/commit/7f3287db654395f9c5ddd246325ff7889f550286 // to work on a kind cluster. Flag allows to disable it for current CI, will be turned on when github runners have this fix. AdvertisedUDNIsolationMode string `gcfg:"advertised-udn-isolation-mode"` @@ -620,6 +653,32 @@ type ClusterManagerConfig struct { V6TransitSubnet string `gcfg:"v6-transit-subnet"` } +// NoOverlayConfig holds configuration for no-overlay mode +type NoOverlayConfig struct { + // OutboundSNAT configures SNAT behavior for outbound traffic from pods on the default network. + // Supported values: "enabled" or "disabled". + // Required when transport=no-overlay. + OutboundSNAT string `gcfg:"outbound-snat"` + // Routing configures whether the pod network routing configuration is managed by + // OVN-Kubernetes or users. Supported values: "managed" or "unmanaged". + // Required when transport=no-overlay. + Routing string `gcfg:"routing"` +} + +// ManagedBGPConfig holds configuration for managed BGP +type ManagedBGPConfig struct { + // ASNumber specifies the AS number to be used by the BGP speakers on each node for its + // default VRF when no-overlay networks are configured with managed routing. + // It is shared by both the cluster default network and CUDNs. + // Supports both 16-bit (1-65535) and 32-bit (1-4294967295) AS numbers. + // Optional. Defaults to 64512 if not specified. + ASNumber uint32 `gcfg:"as-number"` + // Topology configures the BGP peering topology when routing is managed. + // Supported values: "full-mesh". + // Required when transport=no-overlay and routing=managed. + Topology string `gcfg:"topology"` +} + // OvnDBScheme describes the OVN database connection transport method type OvnDBScheme string @@ -651,6 +710,8 @@ type config struct { OvnKubeNode OvnKubeNodeConfig ClusterManager ClusterManagerConfig OvsPaths OvsPathConfig + NoOverlay NoOverlayConfig `gcfg:"no-overlay"` + ManagedBGP ManagedBGPConfig `gcfg:"bgp-managed"` } var ( @@ -671,6 +732,8 @@ var ( savedOvnKubeNode OvnKubeNodeConfig savedClusterManager ClusterManagerConfig savedOvsPaths OvsPathConfig + savedNoOverlay NoOverlayConfig + savedManagedBGP ManagedBGPConfig // legacy service-cluster-ip-range CLI option serviceClusterIPRange string @@ -701,6 +764,8 @@ func init() { savedOvnKubeNode = OvnKubeNode savedClusterManager = ClusterManager savedOvsPaths = OvsPaths + savedNoOverlay = NoOverlay + savedManagedBGP = ManagedBGP cli.VersionPrinter = func(_ *cli.Context) { fmt.Printf("Version: %s\n", Version) fmt.Printf("Git commit: %s\n", Commit) @@ -732,6 +797,8 @@ func PrepareTestConfig() error { OvnKubeNode = savedOvnKubeNode ClusterManager = savedClusterManager OvsPaths = savedOvsPaths + NoOverlay = savedNoOverlay + ManagedBGP = savedManagedBGP Kubernetes.DisableRequestedChassis = false EnableMulticast = false UnprivilegedMode = false @@ -754,6 +821,7 @@ func PrepareTestConfig() error { // Don't pick up defaults from the environment os.Unsetenv("KUBECONFIG") os.Unsetenv("K8S_CACERT") + os.Unsetenv("K8S_CACERT_DATA") os.Unsetenv("K8S_APISERVER") os.Unsetenv("K8S_TOKEN") os.Unsetenv("K8S_TOKEN_FILE") @@ -870,7 +938,7 @@ var CommonFlags = []cli.Flag{ }, &cli.StringFlag{ Name: "encap-type", - Usage: "The encapsulation protocol to use to transmit packets between hypervisors", + Usage: "The encapsulation protocol to use to transmit packets between hypervisors by OVN in overlay mode (geneve, vxlan, gre)", Destination: &cliConfig.Default.EncapType, Value: Default.EncapType, }, @@ -965,6 +1033,12 @@ var CommonFlags = []cli.Flag{ "it defaults to 24 if unspecified.", Destination: &cliConfig.Default.RawClusterSubnets, }, + &cli.StringFlag{ + Name: "transport", + Value: Default.Transport, + Usage: "Transport technology used for the default network, default to geneve if unspecified. (geneve, no-overlay)", + Destination: &cliConfig.Default.Transport, + }, &cli.BoolFlag{ Name: "unprivileged-mode", Usage: "Run ovnkube-node container in unprivileged mode. Valid only with --init-node option.", @@ -1194,6 +1268,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableStatelessNetPol, Value: OVNKubernetesFeature.EnableStatelessNetPol, }, + &cli.BoolFlag{ + Name: "allow-icmp-network-policy", + Usage: "Allow ICMP/ICMPv6 traffic to bypass NetworkPolicy default-deny rules.", + Destination: &cliConfig.OVNKubernetesFeature.AllowICMPNetworkPolicy, + Value: OVNKubernetesFeature.AllowICMPNetworkPolicy, + }, &cli.BoolFlag{ Name: "enable-interconnect", Usage: "Enable interconnecting multiple zones.", @@ -1310,6 +1390,11 @@ var K8sFlags = []cli.Flag{ Usage: "the absolute path to the Kubernetes API CA certificate (not required if --k8s-kubeconfig is given)", Destination: &cliConfig.Kubernetes.CACert, }, + &cli.StringFlag{ + Name: "k8s-cacert-data", + Usage: "the Base64 encoded Kubernetes API CA certificate data (not required if --k8s-kubeconfig is given)", + Destination: &cliConfig.Kubernetes.CACertData, + }, &cli.StringFlag{ Name: "k8s-token", Usage: "the Kubernetes API authentication token (not required if --k8s-kubeconfig is given)", @@ -1876,8 +1961,46 @@ func setOVSExternalID(exec kexec.Interface, key, value string) error { return nil } +// reconcileKubernetesAuthFields ensures that if a config stage provides Token/TokenFile +// or CACert/CACertData, stale value for any of these set by previous stage is cleared. +// This is required since any combination of these fields could be set by any stage +// and might get overwritten only partially. +func reconcileKubernetesAuthFields(k *KubernetesConfig, override *KubernetesConfig) { + // If this stage provided either Token or TokenFile, clear the other field + // not provided by this stage. + overrideHasToken := override.Token != "" + overrideHasTokenFile := override.TokenFile != "" + + if overrideHasToken || overrideHasTokenFile { + if !overrideHasToken { + k.Token = "" + } + if !overrideHasTokenFile { + k.TokenFile = "" + } + } + + // If this stage provided either CACert or CACertData, clear the other field + // not provided by this stage. + overrideHasCACert := override.CACert != "" + overrideHasCACertData := override.CACertData != "" + + if overrideHasCACert || overrideHasCACertData { + if !overrideHasCACert { + k.CACert = "" + } + if !overrideHasCACertData { + k.CACertData = "" + } + } +} + func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath string, defaults *Defaults) error { - // token adn ca.crt may be from files mounted in container. + // values for token, cacert, kubeconfig, api-server may be found in several places. + // Priority order (highest first): OVS config, command line options, config file, + // environment variables, service account files + + // token and ca.crt may be from files mounted in container. saConfig := savedKubernetes if data, err := os.ReadFile(filepath.Join(saPath, kubeServiceAccountFileToken)); err == nil { saConfig.Token = string(data) @@ -1891,16 +2014,13 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin return err } - // values for token, cacert, kubeconfig, api-server may be found in several places. - // Priority order (highest first): OVS config, command line options, config file, - // environment variables, service account files - envConfig := savedKubernetes envVarsMap := map[string]string{ "Kubeconfig": "KUBECONFIG", "BootstrapKubeconfig": "BOOTSTRAP_KUBECONFIG", "CertDir": "CERT_DIR", "CACert": "K8S_CACERT", + "CACertData": "K8S_CACERT_DATA", "APIServer": "K8S_APISERVER", "Token": "K8S_TOKEN", "TokenFile": "K8S_TOKEN_FILE", @@ -1915,16 +2035,19 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin if err := overrideFields(&Kubernetes, &envConfig, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &envConfig) // Copy config file values over default values if err := overrideFields(&Kubernetes, &file.Kubernetes, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &file.Kubernetes) // And CLI overrides over config file and default values if err := overrideFields(&Kubernetes, &cli.Kubernetes, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &cli.Kubernetes) // Grab default values from OVS external IDs if defaults.K8sAPIServer { @@ -1945,8 +2068,15 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin return fmt.Errorf("kubernetes kubeconfig file %q not found", Kubernetes.Kubeconfig) } - if Kubernetes.CACert != "" { - bytes, err := os.ReadFile(Kubernetes.CACert) + if Kubernetes.CACert != "" || Kubernetes.CACertData != "" { + var bytes []byte + var err error + if Kubernetes.CACert != "" { + bytes, err = os.ReadFile(Kubernetes.CACert) + } else { + bytes, err = base64.StdEncoding.DecodeString(Kubernetes.CACertData) + } + if err != nil { return err } @@ -2300,6 +2430,115 @@ func buildClusterManagerConfig(cli, file *config) error { return nil } +// buildNoOverlayConfig updates NoOverlay config from config file only +// NoOverlay configuration is only available in config file, not via CLI flags +func buildNoOverlayConfig(file *config) error { + // Copy config file values over default values + if err := overrideFields(&NoOverlay, &file.NoOverlay, &savedNoOverlay); err != nil { + return err + } + + return nil +} + +// validateNoOverlayConfig validates the no-overlay configuration +func validateNoOverlayConfig() error { + // Validate transport option + if Default.Transport != types.NetworkTransportGeneve && Default.Transport != types.NetworkTransportNoOverlay { + return fmt.Errorf("invalid transport %q: must be %q or %q", Default.Transport, types.NetworkTransportGeneve, types.NetworkTransportNoOverlay) + } + + // If transport is no-overlay, validate required no-overlay options + if Default.Transport == types.NetworkTransportNoOverlay { + if !OVNKubernetesFeature.EnableRouteAdvertisements { + return fmt.Errorf("enable-route-advertisements must be true when transport=%q", types.NetworkTransportNoOverlay) + } + if NoOverlay.OutboundSNAT == "" { + return fmt.Errorf("outbound-snat is required when transport=no-overlay") + } + if NoOverlay.OutboundSNAT != NoOverlaySNATEnabled && NoOverlay.OutboundSNAT != NoOverlaySNATDisabled { + return fmt.Errorf("invalid outbound-snat %q: must be %q or %q", NoOverlay.OutboundSNAT, NoOverlaySNATEnabled, NoOverlaySNATDisabled) + } + + if NoOverlay.Routing == "" { + return fmt.Errorf("routing is required when transport=no-overlay") + } + if NoOverlay.Routing != NoOverlayRoutingManaged && NoOverlay.Routing != NoOverlayRoutingUnmanaged { + return fmt.Errorf("invalid routing %q: must be %q or %q", NoOverlay.Routing, NoOverlayRoutingManaged, NoOverlayRoutingUnmanaged) + } + + // If routing is managed, topology is required + if NoOverlay.Routing == NoOverlayRoutingManaged { + if ManagedBGP.Topology == "" { + return fmt.Errorf("topology is required when routing=managed") + } + if ManagedBGP.Topology != ManagedBGPTopologyFullMesh { + return fmt.Errorf("invalid topology %q: must be %q", ManagedBGP.Topology, ManagedBGPTopologyFullMesh) + } + } + } else { + // Warn if no-overlay or BGP config is specified but transport is not no-overlay + if NoOverlay.OutboundSNAT != "" || NoOverlay.Routing != "" { + klog.Warningf("[no-overlay] configuration specified but transport is %q; configuration will be ignored", Default.Transport) + } + } + + return nil +} + +// validateConfig performs all configuration validations after configs are built and completed. +// This is the centralized place called after completeConfig() that orchestrates all validations. +func validateConfig() error { + // Validate managed BGP configuration + if err := validateManagedBGPConfig(); err != nil { + return err + } + + // Validate no-overlay/transport configuration + if err := validateNoOverlayConfig(); err != nil { + return err + } + + return nil +} + +// buildManagedBGPConfig updates managed BGP config from config file only +// ManagedBGP configuration is only available in config file, not via CLI flags +func buildManagedBGPConfig(file *config) error { + // Copy config file values over default values + if err := overrideFields(&ManagedBGP, &file.ManagedBGP, &savedManagedBGP); err != nil { + return err + } + + return nil +} + +// validateManagedBGPConfig validates the managed BGP configuration +func validateManagedBGPConfig() error { + // Validate AS number is in valid range + // Valid AS numbers: 1-4294967295 (32-bit) + // Reserved ranges: + // 0 - Reserved (RFC 7607) + // 23456 - AS_TRANS (RFC 6793) + // 65535 - Reserved (RFC 7300) + // 4294967295 - Reserved (RFC 7300) + + if ManagedBGP.ASNumber == 0 { + return fmt.Errorf("invalid as-number: 0 is reserved") + } + if ManagedBGP.ASNumber == 23456 { + return fmt.Errorf("invalid as-number: 23456 is reserved (AS_TRANS for 16-bit to 32-bit AS translation)") + } + if ManagedBGP.ASNumber == 65535 { + return fmt.Errorf("invalid as-number: 65535 is reserved") + } + if ManagedBGP.ASNumber == 4294967295 { + return fmt.Errorf("invalid as-number: 4294967295 is reserved") + } + + return nil +} + // completeClusterManagerConfig completes the ClusterManager config by parsing raw values // into their final form. func completeClusterManagerConfig(allSubnets *ConfigSubnets) error { @@ -2352,6 +2591,7 @@ func buildDefaultConfig(cli, file *config) error { if Default.Zone == "" { Default.Zone = types.OvnDefaultZone } + return nil } @@ -2432,6 +2672,7 @@ func stripTokenFromK8sConfig() KubernetesConfig { // Token and CAData are sensitive fields so stripping // them while logging. k8sConf.Token = "" + k8sConf.CACertData = "" k8sConf.CAData = []byte{} return k8sConf } @@ -2462,6 +2703,8 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d OvnKubeNode: savedOvnKubeNode, ClusterManager: savedClusterManager, OvsPaths: savedOvsPaths, + NoOverlay: savedNoOverlay, + ManagedBGP: savedManagedBGP, } configFile, configFileIsDefault = getConfigFilePath(ctx) @@ -2587,6 +2830,14 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d return "", err } + if err = buildNoOverlayConfig(&cfg); err != nil { + return "", err + } + + if err = buildManagedBGPConfig(&cfg); err != nil { + return "", err + } + tmpAuth, err := buildOvnAuth(exec, true, &cliConfig.OvnNorth, &cfg.OvnNorth, defaults.OvnNorthAddress) if err != nil { return "", err @@ -2603,6 +2854,11 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d return "", err } + // Perform cross-configuration validations + if err := validateConfig(); err != nil { + return "", err + } + klog.V(5).Infof("Features config: %+v", OVNKubernetesFeature) klog.V(5).Infof("Default config: %+v", Default) klog.V(5).Infof("Logging config: %+v", Logging) @@ -2617,6 +2873,8 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d klog.V(5).Infof("Ovnkube Node config: %+v", OvnKubeNode) klog.V(5).Infof("Ovnkube Cluster Manager config: %+v", ClusterManager) klog.V(5).Infof("OVS Paths config: %+v", OvsPaths) + klog.V(5).Infof("No Overlay config: %+v", NoOverlay) + klog.V(5).Infof("Managed BGP config: %+v", ManagedBGP) return retConfigFile, nil } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 6127dff90e..2a108e39d6 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -2121,4 +2121,255 @@ udn-allowed-default-services= ns/svc, ns1/svc1 gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) + + Describe("No-Overlay Configuration", func() { + BeforeEach(func() { + err := PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Enable route advertisements - required for no-overlay transport + OVNKubernetesFeature.EnableRouteAdvertisements = true + }) + + It("validates transport option correctly", func() { + // Test valid geneve transport + Default.Transport = types.NetworkTransportGeneve + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid no-overlay transport with required options + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid transport + Default.Transport = "invalid-transport" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid transport")) + }) + + It("requires outbound-snat when transport is no-overlay", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = "" + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("outbound-snat is required")) + }) + + It("validates outbound-snat values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + + // Test valid enable + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid disable + NoOverlay.OutboundSNAT = NoOverlaySNATDisabled + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + NoOverlay.OutboundSNAT = "maybe" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid outbound-snat")) + }) + + It("requires routing when transport is no-overlay", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = "" + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("routing is required")) + }) + + It("validates routing values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + + // Test valid managed (requires topology) + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid unmanaged (topology not required) + NoOverlay.Routing = NoOverlayRoutingUnmanaged + ManagedBGP.Topology = "" + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + NoOverlay.Routing = "automatic" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid routing")) + }) + + It("builds no-overlay config from file only", func() { + fileConfig := config{ + NoOverlay: NoOverlayConfig{ + OutboundSNAT: NoOverlaySNATEnabled, + Routing: NoOverlayRoutingManaged, + }, + ManagedBGP: ManagedBGPConfig{ + Topology: ManagedBGPTopologyFullMesh, + }, + } + err := buildNoOverlayConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + err = buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Config file values should be applied + gomega.Expect(NoOverlay.OutboundSNAT).To(gomega.Equal(NoOverlaySNATEnabled)) + gomega.Expect(NoOverlay.Routing).To(gomega.Equal(NoOverlayRoutingManaged)) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + }) + + It("requires topology when routing is managed", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = "" + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("topology is required when routing=managed")) + }) + + It("validates topology values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + + // Test valid full-mesh + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + ManagedBGP.Topology = "route-reflector" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid topology")) + gomega.Expect(err.Error()).To(gomega.ContainSubstring(`must be "full-mesh"`)) + }) + + It("does not require topology when routing is unmanaged", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingUnmanaged + ManagedBGP.Topology = "" + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) + + Describe("BGP Configuration", func() { + BeforeEach(func() { + err := PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + It("parses BGP config from file with all fields set", func() { + fileConfig := config{ + ManagedBGP: ManagedBGPConfig{ + Topology: ManagedBGPTopologyFullMesh, + ASNumber: 64500, + }, + } + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64500))) + }) + + It("handles partial BGP config in file", func() { + fileConfig := config{ + ManagedBGP: savedManagedBGP, + } + fileConfig.ManagedBGP.Topology = ManagedBGPTopologyFullMesh + + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + // ASNumber should retain default value from init + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64512))) + }) + + It("handles empty BGP config in file", func() { + fileConfig := config{ + ManagedBGP: savedManagedBGP, + } + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Should retain default values without panicking + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64512))) // default value + }) + + It("validates reserved AS number 0", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 0 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("0 is reserved")) + }) + + It("validates reserved AS number 23456 (AS_TRANS)", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 23456 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("23456 is reserved")) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("AS_TRANS")) + }) + + It("validates reserved AS number 65535", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 65535 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("65535 is reserved")) + }) + + It("validates reserved AS number 4294967295", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 4294967295 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("4294967295 is reserved")) + }) + + It("accepts valid AS numbers", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + + // Test valid 16-bit AS number + ManagedBGP.ASNumber = 64500 + err := validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test default AS number + ManagedBGP.ASNumber = 64512 + err = validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid 32-bit AS number + ManagedBGP.ASNumber = 100000 + err = validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) }) diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index 61a342f77a..eac0c84651 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -92,15 +92,17 @@ func (cm *ControllerManager) NewNetworkController(nInfo util.NetInfo) (networkma return nil, fmt.Errorf("topology type %s not supported", topoType) } -// newDummyNetworkController creates a dummy network controller used to clean up specific network -func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) (networkmanager.NetworkController, error) { +// newDummyNetworkController creates a dummy network controller used to clean up specific network. +// role is the NetworkRoleExternalID from stale OVN entities (e.g. "primary" or "secondary") so that +// the dummy's netInfo.IsPrimaryNetwork() is correct for Layer2 gateway cleanup. +func (cm *ControllerManager) newDummyNetworkController(topoType, netName, role string) (networkmanager.NetworkController, error) { // Pass a shallow clone of the watch factory, this allows multiplexing // informers for user-defined Networks. cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory.ShallowClone()) if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } - netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType}) + netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType, Role: role}) switch topoType { case ovntypes.Layer3Topology: return ovn.NewLayer3UserDefinedNetworkController(cnci, netInfo, cm.networkManager.Interface(), cm.routeImportManager, cm.eIPController, cm.portCache) @@ -112,33 +114,38 @@ func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) return nil, fmt.Errorf("topology type %s not supported", topoType) } -// Find all the OVN logical switches/routers for the secondary networks -func findAllSecondaryNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, +// findAllUserDefinedNetworkLogicalEntities returns all OVN logical switches and +// routers that belong to user-defined networks (primary or secondary). Same +// predicate as original: entities have NetworkExternalID and NetworkRoleExternalID +// (TopologyExternalID always co-exists with NetworkExternalID per CleanupStaleNetworks). +// Caller reads role and topoType from entity ExternalIDs for dummy controller creation. +// Used on controller restart to remove stale entities for deleted UDNs. +func findAllUserDefinedNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, []*nbdb.LogicalRouter, error) { - belongsToSecondaryNetwork := func(externalIDs map[string]string) bool { + belongsToUserDefinedNetwork := func(externalIDs map[string]string) bool { _, hasNetworkExternalID := externalIDs[ovntypes.NetworkExternalID] - networkRole, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] - return hasNetworkExternalID && hasNetworkRoleExternalID && networkRole == ovntypes.NetworkRoleSecondary + _, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] + return hasNetworkExternalID && hasNetworkRoleExternalID } p1 := func(item *nbdb.LogicalSwitch) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - nodeSwitches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) if err != nil { - klog.Errorf("Failed to get all logical switches of secondary network error: %v", err) + klog.Errorf("Failed to get all logical switches of user-defined networks: %v", err) return nil, nil, err } p2 := func(item *nbdb.LogicalRouter) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - clusterRouters, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) if err != nil { - klog.Errorf("Failed to get all distributed logical routers: %v", err) + klog.Errorf("Failed to get all logical routers of user-defined networks: %v", err) return nil, nil, err } - return nodeSwitches, clusterRouters, nil + return switches, routers, nil } func (cm *ControllerManager) GetDefaultNetworkController() networkmanager.ReconcilableNetworkController { @@ -155,8 +162,9 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) } } - // Get all the existing secondary networks and its logical entities - switches, routers, err := findAllSecondaryNetworkLogicalEntities(cm.nbClient) + // Get all the existing user-defined network logical entities (primary and secondary). + // For a given network, all switches/routers have the same role external ID (primary or secondary). + switches, routers, err := findAllUserDefinedNetworkLogicalEntities(cm.nbClient) if err != nil { return err } @@ -170,11 +178,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := ls.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } for _, lr := range routers { @@ -185,11 +197,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := lr.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/spec.go b/go-controller/pkg/crd/userdefinednetwork/v1/spec.go index cd65f08223..b4fc651575 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/spec.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/spec.go @@ -17,6 +17,16 @@ func (s *UserDefinedNetworkSpec) GetLocalnet() *LocalnetConfig { return nil } +func (s *UserDefinedNetworkSpec) GetTransport() TransportOption { + // UDN (namespace-scoped) does not support EVPN transport + return "" +} + +func (s *UserDefinedNetworkSpec) GetEVPN() *EVPNConfig { + // UDN (namespace-scoped) does not support EVPN + return nil +} + func (s *NetworkSpec) GetTopology() NetworkTopology { return s.Topology } @@ -32,3 +42,11 @@ func (s *NetworkSpec) GetLayer2() *Layer2Config { func (s *NetworkSpec) GetLocalnet() *LocalnetConfig { return s.Localnet } + +func (s *NetworkSpec) GetTransport() TransportOption { + return s.Transport +} + +func (s *NetworkSpec) GetEVPN() *EVPNConfig { + return s.EVPN +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index da58521b1f..ff43fd8476 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -98,6 +98,8 @@ import ( userdefinednetworkscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/scheme" userdefinednetworkapiinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions" userdefinednetworkinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions/userdefinednetwork/v1" + vtepinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -129,6 +131,7 @@ type WatchFactory struct { raFactory routeadvertisementsinformerfactory.SharedInformerFactory frrFactory frrinformerfactory.SharedInformerFactory networkQoSFactory networkqosinformerfactory.SharedInformerFactory + vtepFactory vtepinformerfactory.SharedInformerFactory informers map[reflect.Type]*informer stopChan chan struct{} @@ -158,6 +161,7 @@ func (wf *WatchFactory) ShallowClone() *WatchFactory { raFactory: wf.raFactory, frrFactory: wf.frrFactory, networkQoSFactory: wf.networkQoSFactory, + vtepFactory: wf.vtepFactory, informers: wf.informers, stopChan: wf.stopChan, @@ -281,6 +285,13 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory } } + // Initialize VTEP factory for EVPN support in combined mode (cluster-manager + ovnkube-controller). + if util.IsEVPNEnabled() { + wf.vtepFactory = vtepinformerfactory.NewSharedInformerFactory(ovnClientset.VTEPClient, resyncInterval) + // make sure shared informer is created for a factory, so on wf.vtepFactory.Start() it is initialized and caches are synced. + wf.vtepFactory.K8s().V1().VTEPs().Informer() + } + return wf, nil } @@ -646,6 +657,13 @@ func (wf *WatchFactory) Start() error { } } + if wf.vtepFactory != nil { + wf.vtepFactory.Start(wf.stopChan) + if err := waitForCacheSyncWithTimeout(wf.vtepFactory, wf.stopChan); err != nil { + return err + } + } + if wf.raFactory != nil { wf.raFactory.Start(wf.stopChan) if err := waitForCacheSyncWithTimeout(wf.raFactory, wf.stopChan); err != nil { @@ -706,6 +724,10 @@ func (wf *WatchFactory) Stop() { wf.cncFactory.Shutdown() } + if wf.vtepFactory != nil { + wf.vtepFactory.Shutdown() + } + if wf.raFactory != nil { wf.raFactory.Shutdown() } @@ -1081,6 +1103,13 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset wf.iFactory.Core().V1().Pods().Informer() } + // Initialize VTEP factory for EVPN support. + if util.IsEVPNEnabled() { + wf.vtepFactory = vtepinformerfactory.NewSharedInformerFactory(ovnClientset.VTEPClient, resyncInterval) + // make sure shared informer is created for a factory, so on wf.vtepFactory.Start() it is initialized and caches are synced. + wf.vtepFactory.K8s().V1().VTEPs().Informer() + } + if util.IsNetworkConnectEnabled() { wf.cncFactory = networkconnectinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkConnectClient, resyncInterval) wf.informers[ClusterNetworkConnectType], err = newQueuedInformer(eventQueueSize, @@ -1821,6 +1850,10 @@ func (wf *WatchFactory) ClusterNetworkConnectInformer() networkconnectinformer.C return wf.cncFactory.K8s().V1().ClusterNetworkConnects() } +func (wf *WatchFactory) VTEPInformer() vtepinformer.VTEPInformer { + return wf.vtepFactory.K8s().V1().VTEPs() +} + func (wf *WatchFactory) DNSNameResolverInformer() ocpnetworkinformerv1alpha1.DNSNameResolverInformer { return wf.dnsFactory.Network().V1alpha1().DNSNameResolvers() } diff --git a/go-controller/pkg/libovsdb/ops/chassis.go b/go-controller/pkg/libovsdb/ops/chassis.go index 83a2d6a3c2..4af8bc58b1 100644 --- a/go-controller/pkg/libovsdb/ops/chassis.go +++ b/go-controller/pkg/libovsdb/ops/chassis.go @@ -2,6 +2,9 @@ package ops import ( "context" + "fmt" + + "github.com/google/uuid" "k8s.io/apimachinery/pkg/util/sets" @@ -171,3 +174,19 @@ func CreateOrUpdateChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis return nil } + +// validateRequestedChassisOption is a guard to ensure a caller is using the chassis-id (uuid format) +// for the requested chassis option. +func validateRequestedChassisOption(options map[string]string) error { + if len(options) == 0 { + return nil + } + chassisID, ok := options[RequestedChassis] + if !ok || chassisID == "" { + return nil + } + if _, err := uuid.Parse(chassisID); err != nil { + return fmt.Errorf("requested-chassis must be a valid UUID, got %q", chassisID) + } + return nil +} diff --git a/go-controller/pkg/libovsdb/ops/db_object_types.go b/go-controller/pkg/libovsdb/ops/db_object_types.go index 0a31560d1f..375f845ef8 100644 --- a/go-controller/pkg/libovsdb/ops/db_object_types.go +++ b/go-controller/pkg/libovsdb/ops/db_object_types.go @@ -250,9 +250,10 @@ var ACLNetworkPolicyPortIndex = newObjectIDsType(acl, NetworkPolicyPortIndexOwne // ingress/egress + NetworkPolicy[In/E]gressRule idx - defines given gressPolicy. // ACLs are created for gp.portPolicies which are grouped by protocol: // - for empty policy (no selectors and no ip blocks) - empty ACL (see allIPsMatch) +// with idx=emptyIdx (-1) // OR -// - all selector-based peers ACL -// - for every IPBlock +1 ACL +// - all selector-based peers ACL with idx=emptyIdx (-1) +// - all ipBlocks combined into a single ACL with idx=ipBlockCombinedIdx (-2) // Therefore unique id for a given gressPolicy is protocol name + IPBlock idx // (protocol will be "None" if no port policy is defined, and empty policy and all // selector-based peers ACLs will have idx=-1) diff --git a/go-controller/pkg/libovsdb/ops/router.go b/go-controller/pkg/libovsdb/ops/router.go index 8266bc34dd..3dc443bef3 100644 --- a/go-controller/pkg/libovsdb/ops/router.go +++ b/go-controller/pkg/libovsdb/ops/router.go @@ -187,6 +187,9 @@ func CreateOrUpdateLogicalRouterPort(nbClient libovsdbclient.Client, router *nbd // and returns the corresponding ops func CreateOrUpdateLogicalRouterPortOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, router *nbdb.LogicalRouter, lrp *nbdb.LogicalRouterPort, chassis *nbdb.GatewayChassis, fields ...interface{}) ([]ovsdb.Operation, error) { + if err := validateRequestedChassisOption(lrp.Options); err != nil { + return nil, err + } opModels := []operationModel{} if chassis != nil { opModels = append(opModels, operationModel{ diff --git a/go-controller/pkg/libovsdb/ops/switch.go b/go-controller/pkg/libovsdb/ops/switch.go index 4136f96bba..ff0216bd03 100644 --- a/go-controller/pkg/libovsdb/ops/switch.go +++ b/go-controller/pkg/libovsdb/ops/switch.go @@ -323,6 +323,9 @@ func createOrUpdateLogicalSwitchPortsOps(nbClient libovsdbclient.Client, ops []o opModels := make([]operationModel, 0, len(lsps)+1) for _, lsp := range lsps { + if err := validateRequestedChassisOption(lsp.Options); err != nil { + return nil, err + } opModel := createOrUpdateLogicalSwitchPortOpModelWithCustomFields(sw, lsp, createLSP, customFields) opModels = append(opModels, opModel) } @@ -480,38 +483,3 @@ func DeleteLogicalSwitchPortsWithPredicateOps(nbClient libovsdbclient.Client, op m := newModelClient(nbClient) return m.DeleteOps(ops, opModels...) } - -// UpdateLogicalSwitchPortSetOptions sets options on the provided logical switch -// port adding any missing, removing the ones set to an empty value and updating -// existing -func UpdateLogicalSwitchPortSetOptions(nbClient libovsdbclient.Client, lsp *nbdb.LogicalSwitchPort) error { - options := lsp.Options - lsp, err := GetLogicalSwitchPort(nbClient, lsp) - if err != nil { - return err - } - - if lsp.Options == nil { - lsp.Options = map[string]string{} - } - - for k, v := range options { - if v == "" { - delete(lsp.Options, k) - } else { - lsp.Options[k] = v - } - } - - opModel := operationModel{ - // For LSP's Name is a valid index, so no predicate is needed - Model: lsp, - OnModelUpdates: []interface{}{&lsp.Options}, - ErrNotFound: true, - BulkOp: false, - } - - m := newModelClient(nbClient) - _, err = m.CreateOrUpdate(opModel) - return err -} diff --git a/go-controller/pkg/metrics/metrics.go b/go-controller/pkg/metrics/metrics.go index 89b0fa896f..e574a9c468 100644 --- a/go-controller/pkg/metrics/metrics.go +++ b/go-controller/pkg/metrics/metrics.go @@ -2,11 +2,9 @@ package metrics import ( "context" - "crypto/tls" "fmt" "io" "net/http" - "net/http/pprof" "os" "path" "regexp" @@ -16,11 +14,8 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - utilwait "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -417,26 +412,6 @@ func CheckPodRunsOnGivenNode(clientset kubernetes.Interface, labels []string, k8 strings.Join(labels, ","), k8sNodeName) } -// using the cyrpto/tls module's GetCertificate() callback function helps in picking up -// the latest certificate (due to cert rotation on cert expiry) -func getTLSServer(addr, certFile, privKeyFile string, handler http.Handler) *http.Server { - tlsConfig := &tls.Config{ - GetCertificate: func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) { - cert, err := tls.LoadX509KeyPair(certFile, privKeyFile) - if err != nil { - return nil, fmt.Errorf("error generating x509 certs for metrics TLS endpoint: %v", err) - } - return &cert, nil - }, - } - server := &http.Server{ - Addr: addr, - Handler: handler, - TLSConfig: tlsConfig, - } - return server -} - // stringFlagSetterFunc is a func used for setting string type flag. type stringFlagSetterFunc func(string) (string, error) @@ -482,25 +457,27 @@ func writePlainText(statusCode int, text string, w http.ResponseWriter) { fmt.Fprintln(w, text) } -// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected -// It puts the endpoint behind TLS if certFile and keyFile are defined. +// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected. +// It now reuses the unified MetricServer implementation so it can share plumbing with the +// OVN/OVS metrics server. TLS and pprof behaviour remain unchanged. func StartMetricsServer(bindAddress string, enablePprof bool, certFile string, keyFile string, stopChan <-chan struct{}, wg *sync.WaitGroup) { - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - - if enablePprof { - mux.HandleFunc("/debug/pprof/", pprof.Index) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", pprof.Profile) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", pprof.Trace) - - // Allow changes to log level at runtime - mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + opts := MetricServerOptions{ + BindAddress: bindAddress, + CertFile: certFile, + KeyFile: keyFile, + EnablePprof: enablePprof, + // Use default registry so existing metric registrations keep working. + Registerer: prometheus.DefaultRegisterer, } - startMetricsServer(bindAddress, certFile, keyFile, mux, stopChan, wg) + server := NewMetricServer(opts, nil, nil) + + wg.Add(1) + go func() { + defer wg.Done() + server.Run(stopChan) + }() } // StartOVNMetricsServer runs the prometheus listener so that OVN metrics can be collected @@ -522,40 +499,3 @@ func StartOVNMetricsServer(opts MetricServerOptions, return metricsServer } - -func startMetricsServer(bindAddress, certFile, keyFile string, handler http.Handler, stopChan <-chan struct{}, wg *sync.WaitGroup) { - var server *http.Server - wg.Add(1) - go func() { - defer wg.Done() - utilwait.Until(func() { - klog.Infof("Starting metrics server at address %q", bindAddress) - var listenAndServe func() error - if certFile != "" && keyFile != "" { - server = getTLSServer(bindAddress, certFile, keyFile, handler) - listenAndServe = func() error { return server.ListenAndServeTLS("", "") } - } else { - server = &http.Server{Addr: bindAddress, Handler: handler} - listenAndServe = func() error { return server.ListenAndServe() } - } - - errCh := make(chan error) - go func() { - errCh <- listenAndServe() - }() - var err error - select { - case err = <-errCh: - err = fmt.Errorf("failed while running metrics server at address %q: %w", bindAddress, err) - utilruntime.HandleError(err) - case <-stopChan: - klog.Infof("Stopping metrics server at address %q", bindAddress) - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := server.Shutdown(shutdownCtx); err != nil { - klog.Errorf("Error stopping metrics server at address %q: %v", bindAddress, err) - } - } - }, 5*time.Second, stopChan) - }() -} diff --git a/go-controller/pkg/metrics/ovn.go b/go-controller/pkg/metrics/ovn.go index a243aec0ee..cb266f2026 100644 --- a/go-controller/pkg/metrics/ovn.go +++ b/go-controller/pkg/metrics/ovn.go @@ -364,7 +364,7 @@ func updateSBDBConnectionMetric(ovsAppctl ovsClient) { } // RegisterOvnControllerMetrics registers the ovn-controller metrics -func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry *prometheus.Registry) { +func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry prometheus.Registerer) { getOvnControllerVersionInfo() ovnRegistry.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ diff --git a/go-controller/pkg/metrics/ovn_db.go b/go-controller/pkg/metrics/ovn_db.go index e42fa1be3f..7a9cfdd0f7 100644 --- a/go-controller/pkg/metrics/ovn_db.go +++ b/go-controller/pkg/metrics/ovn_db.go @@ -359,7 +359,7 @@ func getOvnDbVersionInfo() { } } -func RegisterOvnDBMetrics(ovnRegistry *prometheus.Registry) ([]*util.OvsDbProperties, bool, bool) { +func RegisterOvnDBMetrics(ovnRegistry prometheus.Registerer) ([]*util.OvsDbProperties, bool, bool) { // get the ovsdb server version info getOvnDbVersionInfo() // register metrics that will be served off of /metrics path diff --git a/go-controller/pkg/metrics/server.go b/go-controller/pkg/metrics/server.go index 88d04ce2a4..ca5a23680a 100644 --- a/go-controller/pkg/metrics/server.go +++ b/go-controller/pkg/metrics/server.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net/http" + "net/http/pprof" "time" "github.com/prometheus/client_golang/prometheus" @@ -35,11 +36,15 @@ type MetricServerOptions struct { EnableOVNDBMetrics bool EnableOVNControllerMetrics bool EnableOVNNorthdMetrics bool + EnablePprof bool // OnFatalError is called when an unrecoverable error occurs (e.g., failed to bind to address). // If set, it allows the caller to trigger a graceful shutdown. OnFatalError func() + // Prometheus plumbing + Registerer prometheus.Registerer + // Kubernetes integration K8sClient kubernetes.Interface K8sNodeName string @@ -63,35 +68,48 @@ type MetricServer struct { server *http.Server mux *http.ServeMux - // Prometheus registries - ovnRegistry *prometheus.Registry + // Prometheus registry + registerer prometheus.Registerer } // NewMetricServer creates a new MetricServer instance func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client, kubeClient kubernetes.Interface) *MetricServer { - // Create server instance + registerer := opts.Registerer + if registerer == nil { + registerer = prometheus.NewRegistry() + } + server := &MetricServer{ opts: opts, ovsDBClient: ovsDBClient, - ovnRegistry: prometheus.NewRegistry(), + registerer: registerer, kubeClient: kubeClient, } server.mux = http.NewServeMux() - metricsHandler := promhttp.HandlerForTransactional( - prometheus.ToTransactionalGatherer(server.ovnRegistry), - promhttp.HandlerOpts{}, - ) + tg := prometheus.ToTransactionalGatherer(server.registerer.(prometheus.Gatherer)) + metricsHandler := promhttp.HandlerForTransactional(tg, promhttp.HandlerOpts{}) + server.mux.Handle("/metrics", promhttp.InstrumentMetricHandler( - server.ovnRegistry, + server.registerer, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Update metrics in the registry before emitting them. server.handleMetrics(r) - // Emit the updated metrics using the transactional handler. metricsHandler.ServeHTTP(w, r) }), )) + if opts.EnablePprof { + server.mux.HandleFunc("/debug/pprof/", pprof.Index) + server.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + server.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + server.mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + server.mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + + // Allow changes to log level at runtime + server.mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + } + return server } @@ -99,32 +117,32 @@ func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client func (s *MetricServer) registerMetrics() { if s.opts.EnableOVSMetrics { klog.Infof("MetricServer registers OVS metrics") - registerOvsMetrics(s.ovsDBClient, s.ovnRegistry) + registerOvsMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNDBMetrics { klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } if s.opts.EnableOVNControllerMetrics { klog.Infof("MetricServer registers OVN Controller metrics") - RegisterOvnControllerMetrics(s.ovsDBClient, s.ovnRegistry) + RegisterOvnControllerMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNNorthdMetrics { klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } } func (s *MetricServer) EnableOVNNorthdMetrics() { s.opts.EnableOVNNorthdMetrics = true klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } func (s *MetricServer) EnableOVNDBMetrics() { s.opts.EnableOVNDBMetrics = true klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } // updateOvsMetrics updates the OVS metrics @@ -226,6 +244,7 @@ func (s *MetricServer) Run(stopChan <-chan struct{}) { errCh := make(chan error) go func() { + klog.Infof("Metric Server starts to listen on %s", s.opts.BindAddress) errCh <- listenAndServe() }() diff --git a/go-controller/pkg/metrics/server_test.go b/go-controller/pkg/metrics/server_test.go index 32d4144dd8..c02bfd1188 100644 --- a/go-controller/pkg/metrics/server_test.go +++ b/go-controller/pkg/metrics/server_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -48,7 +49,7 @@ func TestNewMetricServerRunAndShutdown(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -109,7 +110,7 @@ func TestNewMetricServerRunAndFailOnFatalError(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -316,6 +317,7 @@ type metricsTestCase struct { enableOVNDB bool enableOVNController bool enableOVNNorthd bool + registerer prometheus.Registerer mockRunCommands []ovntest.TestifyMockHelper expectedMetrics []string } @@ -379,6 +381,11 @@ func TestHandleMetrics(t *testing.T) { } defer libovsdbCleanup.Cleanup() + // Register OVN-Kube controller base metrics into the default registry, so the + // metrics in default registry can be tested. + RegisterOVNKubeControllerBase() + MetricOVNKubeControllerSyncDuration.WithLabelValues("pods").Set(0) + testCases := []metricsTestCase{ { name: "OVS metrics", @@ -778,6 +785,56 @@ func TestHandleMetrics(t *testing.T) { "promhttp_metric_handler_requests_total", }, }, + { + name: "default registry metrics", + registerer: prometheus.DefaultRegisterer, + expectedMetrics: []string{ + "ovnkube_controller_leader", + "ovnkube_controller_ready_duration_seconds", + "ovnkube_controller_sync_duration_seconds", + "ovnkube_controller_build_info", + "go_gc_duration_seconds", + "go_gc_gogc_percent", + "go_gc_gomemlimit_bytes", + "go_goroutines", + "go_info", + "go_memstats_alloc_bytes", + "go_memstats_alloc_bytes_total", + "go_memstats_buck_hash_sys_bytes", + "go_memstats_frees_total", + "go_memstats_gc_sys_bytes", + "go_memstats_heap_alloc_bytes", + "go_memstats_heap_idle_bytes", + "go_memstats_heap_inuse_bytes", + "go_memstats_heap_objects", + "go_memstats_heap_released_bytes", + "go_memstats_heap_sys_bytes", + "go_memstats_last_gc_time_seconds", + "go_memstats_mallocs_total", + "go_memstats_mcache_inuse_bytes", + "go_memstats_mcache_sys_bytes", + "go_memstats_mspan_inuse_bytes", + "go_memstats_mspan_sys_bytes", + "go_memstats_next_gc_bytes", + "go_memstats_other_sys_bytes", + "go_memstats_stack_inuse_bytes", + "go_memstats_stack_sys_bytes", + "go_memstats_sys_bytes", + "go_sched_gomaxprocs_threads", + "go_threads", + "process_cpu_seconds_total", + "process_max_fds", + "process_network_receive_bytes_total", + "process_network_transmit_bytes_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds", + "process_virtual_memory_bytes", + "process_virtual_memory_max_bytes", + "promhttp_metric_handler_requests_in_flight", + "promhttp_metric_handler_requests_total", + }, + }, } for _, tc := range testCases { @@ -789,6 +846,7 @@ func TestHandleMetrics(t *testing.T) { EnableOVNDBMetrics: tc.enableOVNDB, EnableOVNControllerMetrics: tc.enableOVNController, EnableOVNNorthdMetrics: tc.enableOVNNorthd, + Registerer: tc.registerer, } // Mock the exec runner for RunOvsVswitchdAppCtl calls mockCmd := new(mock_k8s_io_utils_exec.Cmd) @@ -813,8 +871,8 @@ func TestHandleMetrics(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) server.registerMetrics() - // iterate s.ovnRegistry to list all registered metrics' names - regMetrics, err := server.ovnRegistry.Gather() + // Iterate server registry to list all registered metric names. + regMetrics, err := server.registerer.(prometheus.Gatherer).Gather() if err != nil { t.Fatalf("Failed to gather metrics: %v", err) } diff --git a/go-controller/pkg/networkmanager/api.go b/go-controller/pkg/networkmanager/api.go index 7c47997276..1581afcfa2 100644 --- a/go-controller/pkg/networkmanager/api.go +++ b/go-controller/pkg/networkmanager/api.go @@ -43,11 +43,13 @@ type watchFactory interface { // information to the rest of the project. type Interface interface { // GetActiveNetworkForNamespace returns a copy of the primary network for - // the namespace if any or the default network otherwise. If there is a - // primary UDN defined but the NAD has not been processed yet, returns - // ErrNetworkControllerTopologyNotManaged. Used for controllers that are not - // capable of reconciling primary network changes. If unsure, use this one - // and not GetActiveNetworkForNamespaceFast. + // the namespace if any or the default network otherwise. + // If the network is non-existent for a legitimate reason (namespace gone or + // filtered by Dynamic UDN) it returns nil NetInfo and no error. + // If the network is non-existent, but should exist, return InvalidPrimaryNetworkError. + // If unsure, use this one and not GetActiveNetworkForNamespaceFast. + // Note this function is filtered by Dynamic UDN, so if your caller wants NAD/Network + // information without D-UDN filtering, use GetPrimaryNADForNamespace. GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) // GetActiveNetworkForNamespaceFast returns the primary network for the @@ -61,6 +63,7 @@ type Interface interface { // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. // Returns default network if namespace has no primary UDN. + // This function is not filtered based on Dynamic UDN. GetPrimaryNADForNamespace(namespace string) (string, error) // GetNetwork returns the network of the given name or nil if unknown diff --git a/go-controller/pkg/networkmanager/egressip_tracker.go b/go-controller/pkg/networkmanager/egressip_tracker.go index be941b9bcd..9b86bb2cce 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker.go +++ b/go-controller/pkg/networkmanager/egressip_tracker.go @@ -250,7 +250,7 @@ func (t *EgressIPTrackerController) reconcileNamespace(key string) error { primaryNAD, err := t.primaryNADForNamespace(ns.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { // Namespace requires a primary network but none exists yet; NAD controller will requeue. return nil } @@ -380,5 +380,5 @@ func (t *EgressIPTrackerController) getPrimaryNADForNamespaceFromLister(namespac } // The namespace declared it needs a primary UDN but none exists yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } diff --git a/go-controller/pkg/networkmanager/egressip_tracker_test.go b/go-controller/pkg/networkmanager/egressip_tracker_test.go index e82d2b9813..68069b5e6a 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker_test.go +++ b/go-controller/pkg/networkmanager/egressip_tracker_test.go @@ -278,6 +278,15 @@ func TestEgressIPTrackerControllerWithInformer(t *testing.T) { }, metav1.CreateOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) + // Mirror production ordering: NAD controller notifies registered reconcilers + // after the primary NAD is observed, so namespace reconcile isn't dropped due + // to a transient "primary not found" window in informer caches. + primaryNADKey := util.GetNADName(tt.namespace, "primary") + g.Eventually(func() (string, error) { + return tracker.primaryNADForNamespace(tt.namespace) + }, 2*time.Second, 100*time.Millisecond).Should(gomega.Equal(primaryNADKey)) + tracker.NADReconciler().Reconcile(primaryNADKey) + // Expect add events g.Eventually(func() []callbackEvent { gotMu.Lock() diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index b282535f93..29a2099c5c 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -31,7 +31,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" - utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" ) // nadController handles namespaced scoped NAD events and @@ -839,6 +838,10 @@ func (c *nadController) nadNeedsUpdate(oldNAD, newNAD *nettypes.NetworkAttachmen oldNAD.Annotations[types.OvnNetworkNameAnnotation] != newNAD.Annotations[types.OvnNetworkNameAnnotation] } +// GetActiveNetworkForNamespace attempts to get the netInfo of a primary active network where this OVNK instance is running. +// Returns DefaultNetwork if Network Segmentation disabled or namespace does not require primary UDN. +// Returns nil if there is no active network. +// Returns InvalidPrimaryNetworkError if a network should be present but is not. func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { if !util.IsNetworkSegmentationSupportEnabled() { return &util.DefaultNetInfo{}, nil @@ -847,6 +850,10 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net // check if required UDN label is on namespace ns, err := c.namespaceLister.Get(namespace) if err != nil { + if apierrors.IsNotFound(err) { + // namespace is gone, no active network for it + return nil, nil + } return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, err) } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; !exists { @@ -854,49 +861,26 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net return &util.DefaultNetInfo{}, nil } - network, nad := c.getActiveNetworkForNamespace(namespace) + // primary UDN territory, check if our NAD controller to see if it has processed the network and if the + // network manager has rendered the network + network, primaryNAD := c.getActiveNetworkForNamespace(namespace) if network != nil && network.IsPrimaryNetwork() { - // primary UDN found + // primary UDN network found in network controller copy := util.NewMutableNetInfo(network) - copy.SetNADs(nad) + copy.SetNADs(primaryNAD) return copy, nil } - // no primary UDN found, make sure we just haven't processed it yet and no UDN / CUDN exists - udns, err := c.udnLister.UserDefinedNetworks(namespace).List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("error getting user defined networks: %w", err) - } - for _, udn := range udns { - if utiludn.IsPrimaryNetwork(&udn.Spec) { - return nil, util.NewUnprocessedActiveNetworkError(namespace, udn.Name) - } - } - cudns, err := c.cudnLister.List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("failed to list CUDNs: %w", err) - } - for _, cudn := range cudns { - if !utiludn.IsPrimaryNetwork(&cudn.Spec.Network) { - continue - } - // check the subject namespace referred by the specified namespace-selector - cudnNamespaceSelector, err := metav1.LabelSelectorAsSelector(&cudn.Spec.NamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to convert CUDN %q namespaceSelector: %w", cudn.Name, err) - } - selectedNamespaces, err := c.namespaceLister.List(cudnNamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to list namespaces using selector %q: %w", cudnNamespaceSelector, err) - } - for _, ns := range selectedNamespaces { - if ns.Name == namespace { - return nil, util.NewUnprocessedActiveNetworkError(namespace, cudn.Name) - } + // no network exists in the network manager + if primaryNAD != "" { + if config.OVNKubernetesFeature.EnableDynamicUDNAllocation { + // primary NAD exists, no network, and DUDN is enabled, treat this like the network doesn't exist + return nil, nil } + // primary NAD exists, but missing in network manager. This should never happen. + panic(fmt.Sprintf("NAD Controller broken consistency with Network Manager for primary NAD: %s", primaryNAD)) } - // namespace has required UDN label, but no UDN was found return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -907,8 +891,11 @@ func (c *nadController) GetActiveNetworkForNamespaceFast(namespace string) util. // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. -// Returns default network if namespace has no primary UDN +// Returns default network if namespace has no primary UDN or Network Segmentation is disabled func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, error) { + if !util.IsNetworkSegmentationSupportEnabled() { + return types.DefaultNetworkName, nil + } c.RLock() primary := c.primaryNADs[namespace] c.RUnlock() @@ -927,7 +914,7 @@ func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, err } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; exists { // Namespace promises a primary UDN, but we haven't cached one yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // No required label: means default network only. diff --git a/go-controller/pkg/networkmanager/nad_controller_test.go b/go-controller/pkg/networkmanager/nad_controller_test.go index 6062cb9eac..a794c07e01 100644 --- a/go-controller/pkg/networkmanager/nad_controller_test.go +++ b/go-controller/pkg/networkmanager/nad_controller_test.go @@ -178,12 +178,14 @@ func (tnc *testNetworkController) Start(context.Context) error { func (tnc *testNetworkController) Stop() { tnc.tcm.Lock() defer tnc.tcm.Unlock() + fmt.Printf("stopping network: %s\n", testNetworkKey(tnc)) tnc.tcm.stopped = append(tnc.tcm.stopped, testNetworkKey(tnc)) } func (tnc *testNetworkController) Cleanup() error { tnc.tcm.Lock() defer tnc.tcm.Unlock() + fmt.Printf("cleaning up network: %s\n", testNetworkKey(tnc)) tnc.tcm.cleaned = append(tnc.tcm.cleaned, testNetworkKey(tnc)) return nil } @@ -842,8 +844,20 @@ func TestNADController(t *testing.T) { g.Expect(err).ToNot(gomega.HaveOccurred()) netController := nadController.networkController - g.Expect(nadController.networkController.Start()).To(gomega.Succeed()) - defer nadController.networkController.Stop() + // Drive reconciliation only for networks touched by the NAD operation + // to avoid assertions against transient async queue states. + syncTouchedNetworks := func(nadKey, prevNetwork string) { + networkNames := sets.New[string]() + if prevNetwork != "" { + networkNames.Insert(prevNetwork) + } + if currNetwork := nadController.nads[nadKey]; currNetwork != "" { + networkNames.Insert(currNetwork) + } + for _, network := range networkNames.UnsortedList() { + g.Expect(netController.syncNetwork(network)).To(gomega.Succeed()) + } + } for _, args := range tt.args { namespace, name, err := cache.SplitMetaNamespaceKey(args.nad) @@ -858,12 +872,14 @@ func TestNADController(t *testing.T) { g.Expect(err).To(gomega.Or(gomega.Not(gomega.HaveOccurred()), gomega.MatchError(apierrors.IsAlreadyExists, "AlreadyExists"))) } + prevNetwork := nadController.nads[args.nad] err = nadController.syncNAD(args.nad, nad) if args.wantErr { g.Expect(err).To(gomega.HaveOccurred()) } else { g.Expect(err).NotTo(gomega.HaveOccurred()) } + syncTouchedNetworks(args.nad, prevNetwork) } meetsExpectations := func(g gomega.Gomega) { @@ -943,8 +959,7 @@ func TestNADController(t *testing.T) { } } - g.Eventually(meetsExpectations).Should(gomega.Succeed()) - g.Consistently(meetsExpectations).Should(gomega.Succeed()) + meetsExpectations(g) }) } } diff --git a/go-controller/pkg/networkmanager/pod_tracker.go b/go-controller/pkg/networkmanager/pod_tracker.go index 4a300dd099..3a682d41ed 100644 --- a/go-controller/pkg/networkmanager/pod_tracker.go +++ b/go-controller/pkg/networkmanager/pod_tracker.go @@ -191,7 +191,7 @@ func (c *PodTrackerController) getPrimaryNADForNamespaceFromLister(namespace str return util.GetNADName(nad.Namespace, nad.Name), nil } } - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // syncAll builds the cache on initial controller start diff --git a/go-controller/pkg/node/base_node_network_controller_dpu.go b/go-controller/pkg/node/base_node_network_controller_dpu.go index db79e35c39..cdcbae6c2f 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu.go @@ -104,6 +104,7 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) return bnnc.watchFactory.AddPodHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { var activeNetwork util.NetInfo + var err error pod := obj.(*corev1.Pod) klog.V(5).Infof("Add for Pod: %s/%s for network %s", pod.Namespace, pod.Name, netName) @@ -116,22 +117,16 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) nadToDPUCDMap := map[string]*util.DPUConnectionDetails{} if bnnc.IsUserDefinedNetwork() { if bnnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bnnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - klog.Errorf("Failed to get primary network NAD for namespace %s: %v", pod.Namespace, err) - return - } - if foundNamespaceNAD == types.DefaultNetworkName { + klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) return } - networkName := bnnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != netName { + if activeNetwork == nil { + klog.Errorf("Unable to find an active network for namespace %s", pod.Namespace) return } - activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) + if activeNetwork.GetNetworkName() != netName { return } } diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 08726875a3..bcbe568ddb 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -567,6 +567,10 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin if err != nil { return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs, fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue @@ -1036,6 +1040,10 @@ func (c *Controller) repairNode() error { if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue @@ -1142,8 +1150,12 @@ func (c *Controller) migrateFromAddrLabelToAnnotation() error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1174,8 +1186,12 @@ func (c *Controller) addIPToAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1206,8 +1222,12 @@ func (c *Controller) deleteIPFromAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 25c1968ec5..18e0d4c7fe 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -703,60 +703,6 @@ func getOVNSBZone() (string, error) { return dbZone, nil } -/** HACK BEGIN **/ -// TODO(tssurya): Remove this HACK a few months from now. -// checkOVNSBNodeLRSR returns true if the logical router static route for the -// the given nodeSubnet is present in the SBDB -func checkOVNSBNodeLRSR(nodeSubnet *net.IPNet) bool { - var matchv4, matchv6 string - v6 := true - v4 := true - if config.IPv6Mode && utilnet.IsIPv6CIDR(nodeSubnet) { - matchv6 = fmt.Sprintf("match=\"reg7 == 0 && ip6.dst == %s\"", nodeSubnet) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "logical_flow", matchv6) - klog.Infof("Upgrade Hack: checkOVNSBNodeLRSR for node - %s : match %s : stdout - %s : stderr - %s : err %v", - nodeSubnet, matchv6, stdout, stderr, err) - v6 = (err == nil && stderr == "" && stdout != "") - } - if config.IPv4Mode && !utilnet.IsIPv6CIDR(nodeSubnet) { - matchv4 = fmt.Sprintf("match=\"reg7 == 0 && ip4.dst == %s\"", nodeSubnet) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "logical_flow", matchv4) - klog.Infof("Upgrade Hack: checkOVNSBNodeLRSR for node - %s : match %s : stdout - %s : stderr - %s : err %v", - nodeSubnet, matchv4, stdout, stderr, err) - v4 = (err == nil && stderr == "" && stdout != "") - } - return v6 && v4 -} - -func fetchLBNames() string { - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "name", "find", "Load_Balancer") - if err != nil || stderr != "" { - klog.Errorf("Upgrade hack: fetchLBNames could not fetch services %v/%v", err, stderr) - return stdout // will be empty and we will retry - } - klog.Infof("Upgrade Hack: fetchLBNames: stdout - %s : stderr - %s : err %v", stdout, stderr, err) - return stdout -} - -// lbExists returns true if the OVN load balancer for the corresponding namespace/name -// was created -func lbExists(lbNames, namespace, name string) bool { - stitchedServiceName := "Service_" + namespace + "/" + name - match := strings.Contains(lbNames, stitchedServiceName) - klog.Infof("Upgrade Hack: lbExists for service - %s/%s/%s : match - %v", - namespace, name, stitchedServiceName, match) - return match -} - -func portExists(namespace, name string) bool { - lspName := fmt.Sprintf("logical_port=%s", util.GetLogicalPortName(namespace, name)) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "Port_Binding", lspName) - klog.Infof("Upgrade Hack: portExists for pod - %s/%s : stdout - %s : stderr - %s", namespace, name, stdout, stderr) - return err == nil && stderr == "" && stdout != "" -} - -/** HACK END **/ - // Init executes the first steps to start the DefaultNodeNetworkController. // It is split from Start() and executed before UserDefinedNodeNetworkController (UDNNC) // to allow UDNNC to reference the openflow manager created in Init. @@ -820,12 +766,9 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { return fmt.Errorf("timed out waiting for the node zone %s to match the OVN Southbound db zone, err: %v, err1: %v", config.Default.Zone, err, err1) } - // if its nonIC OR IC=true and if its phase1 OR if its IC to IC upgrades - if !config.OVNKubernetesFeature.EnableInterconnect || sbZone == types.OvnDefaultZone || util.HasNodeMigratedZone(node) { // if its nonIC or if its phase1 - for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { - if err := auth.SetDBAuth(); err != nil { - return err - } + for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { + if err := auth.SetDBAuth(); err != nil { + return err } } @@ -956,17 +899,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.Infof("Starting the default node network controller") var err error - var node *corev1.Node if nc.mgmtPortController == nil { return fmt.Errorf("default node network controller hasn't been pre-started") } - if node, err = nc.watchFactory.GetNode(nc.name); err != nil { - return fmt.Errorf("error retrieving node %s: %v", nc.name, err) - } - - nodeAnnotator := kube.NewNodeAnnotator(nc.Kube, node.Name) waiter := newStartupWaiter() // Complete gateway initialization @@ -994,125 +931,6 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - // STEP1: ovnkube-node start's up in remote zone and sets the "k8s.ovn.org/zone-name" above. - // STEP2: We delay the flip of connection for ovnkube-node(ovn-controller) to the new remote SBDB - // until the new remote ovnkube-controller has finished programming all the K8s core objects - // like routes, services and pods. Until then the ovnkube-node will talk to legacy SBDB. - // STEP3: Once we get the signal that the new SBDB is ready, we set the "k8s.ovn.org/remote-zone-migrated" annotation - // STEP4: We call setDBAuth to now point to new SBDB - // STEP5: Legacy ovnkube-master sees "k8s.ovn.org/remote-zone-migrated" annotation on this node and now knows that - // this node has remote-zone-migrated successfully and tears down old setup and creates new IC resource - // plumbing (takes 80ms based on what we saw in CI runs so we might still have that small window of disruption). - // NOTE: ovnkube-node in DPU host mode doesn't go through upgrades for OVN-IC and has no SBDB to connect to. Thus this part shall be skipped. - var syncNodes, syncServices, syncPods bool - if config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.OVNKubernetesFeature.EnableInterconnect && nc.sbZone != types.OvnDefaultZone && !util.HasNodeMigratedZone(node) { - klog.Info("Upgrade Hack: Interconnect is enabled") - var err1 error - start := time.Now() - err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 300*time.Second, true, func(_ context.Context) (bool, error) { - // we loop through all the nodes in the cluster and ensure ovnkube-controller has finished creating the LRSR required for pod2pod overlay communication - if !syncNodes { - nodes, err := nc.watchFactory.GetNodes() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving node %s: %v", nc.name, err) - return false, nil - } - for _, node := range nodes { - node := *node - if nc.name != node.Name && util.GetNodeZone(&node) != config.Default.Zone && !util.NoHostSubnet(&node) { - nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(&node, types.DefaultNetworkName) - if err != nil { - if util.IsAnnotationNotSetError(err) { - klog.Infof("Skipping node %q. k8s.ovn.org/node-subnets annotation was not found", node.Name) - continue - } - err1 = fmt.Errorf("unable to fetch node-subnet annotation for node %s: err, %v", node.Name, err) - return false, nil - } - for _, nodeSubnet := range nodeSubnets { - klog.Infof("Upgrade Hack: node %s, subnet %s", node.Name, nodeSubnet) - if !checkOVNSBNodeLRSR(nodeSubnet) { - err1 = fmt.Errorf("upgrade hack: unable to find LRSR for node %s", node.Name) - return false, nil - } - } - } - } - klog.Infof("Upgrade Hack: Syncing nodes took %v", time.Since(start)) - syncNodes = true - } - // we loop through all existing services in the cluster and ensure ovnkube-controller has finished creating LoadBalancers required for services to work - if !syncServices { - services, err := nc.watchFactory.GetServices() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving the services %v", err) - return false, nil - } - lbNames := fetchLBNames() - for _, s := range services { - // don't process headless service - if !util.ServiceTypeHasClusterIP(s) || !util.IsClusterIPSet(s) { - continue - } - if !lbExists(lbNames, s.Namespace, s.Name) { - return false, nil - } - } - klog.Infof("Upgrade Hack: Syncing services took %v", time.Since(start)) - syncServices = true - } - if !syncPods { - pods, err := nc.watchFactory.GetAllPods() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving the services %v", err) - return false, nil - } - for _, p := range pods { - if !util.PodScheduled(p) || util.PodCompleted(p) || util.PodWantsHostNetwork(p) { - continue - } - if p.Spec.NodeName != nc.name { - // remote pod - continue - } - if !portExists(p.Namespace, p.Name) { - return false, nil - } - } - klog.Infof("Upgrade Hack: Syncing pods took %v", time.Since(start)) - syncPods = true - } - return true, nil - }) - if err != nil { - return fmt.Errorf("upgrade hack: failed while waiting for the remote ovnkube-controller to be ready: %v, %v", err, err1) - } - if err := util.SetNodeZoneMigrated(nodeAnnotator, nc.sbZone); err != nil { - return fmt.Errorf("upgrade hack: failed to set node zone annotation for node %s: %w", nc.name, err) - } - if err := nodeAnnotator.Run(); err != nil { - return fmt.Errorf("upgrade hack: failed to set node %s annotations: %w", nc.name, err) - } - klog.Infof("ovnkube-node %s finished annotating node with remote-zone-migrated; took: %v", nc.name, time.Since(start)) - for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { - if err := auth.SetDBAuth(); err != nil { - return fmt.Errorf("upgrade hack: Unable to set the authentication towards OVN local dbs") - } - } - klog.Infof("Upgrade hack: ovnkube-node %s finished setting DB Auth; took: %v", nc.name, time.Since(start)) - } - /** HACK END **/ - // Wait for management port and gateway resources to be created by the master klog.Infof("Waiting for gateway and management port readiness...") start := time.Now() @@ -1151,11 +969,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { return err } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - nodeController.Run(nc.stopChan) - }() - } else { + nodeController.Run(stopCh) + }(nc.stopChan) + } else if config.OvnKubeNode.Mode != types.NodeModeDPUHost { // attempt to cleanup the possibly stale bridge _, stderr, err := util.RunOVSVsctl("--if-exists", "del-br", "br-ext") if err != nil { @@ -1262,7 +1080,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { nc.linkManager.Run(nc.stopChan, nc.wg) nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() podResClient, err := podresourcesapi.New() if err != nil { @@ -1274,8 +1092,8 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.V(4).Infof("Error closing PodResourcesAPI client: %v", err) } }() - ovspinning.Run(ctx, nc.stopChan, podResClient) - }() + ovspinning.Run(ctx, stopCh, podResClient) + }(nc.stopChan) klog.Infof("Default node network controller initialized and ready.") return nil @@ -1317,10 +1135,10 @@ func (nc *DefaultNodeNetworkController) startEgressIPHealthCheckingServer(mgmtPo } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - healthServer.Run(nc.stopChan) - }() + healthServer.Run(stopCh) + }(nc.stopChan) return nil } @@ -1366,12 +1184,23 @@ func (nc *DefaultNodeNetworkController) reconcileConntrackUponEndpointSliceEvent klog.Errorf("Failed to get service port for endpoint %s: %v", oldIPStr, err) continue } - // upon update and delete events, flush conntrack only for UDP + // upon update and delete events, flush UDP conntrack for Service port if _, err := util.DeleteConntrackServicePort(oldIPStr, servicePort.Port, *oldPort.Protocol, netlink.ConntrackReplyAnyIP, nil); err != nil { klog.Errorf("Failed to delete conntrack entry for %s port %d: %v", oldIPStr, servicePort.Port, err) errors = append(errors, err) } + + // Flush UDP conntrack entries for NodePort (and LoadBalancer services that allocate NodePorts) + // TODO: Once vishvananda/netlink support ConntrackFilterType '--reply-port-src', we can use one DeleteConntrackServicePort() call + // conntrack entries for both ClusterIP and NodePort. + if util.ServiceTypeHasNodePort(svc) && servicePort.NodePort > 0 { + if _, err := util.DeleteConntrackServicePort(oldIPStr, servicePort.NodePort, *oldPort.Protocol, + netlink.ConntrackReplyAnyIP, nil); err != nil { + klog.Errorf("Failed to delete conntrack entry for %s NodePort %d: %v", oldIPStr, servicePort.NodePort, err) + errors = append(errors, err) + } + } } } } @@ -1595,9 +1424,23 @@ func (nc *DefaultNodeNetworkController) syncNodes(objs []interface{}) error { } // validateVTEPInterfaceMTU checks if the MTU of the interface that has ovn-encap-ip is big -// enough to carry the `config.Default.MTU` and the Geneve header. If the MTU is not big -// enough, it will return an error +// enough to carry the `config.Default.MTU` and the Geneve header (if overlay transport is used). +// If the MTU is not big enough, it will return an error func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { + // calc required MTU + var requiredMTU int + if config.Gateway.SingleNode || config.Default.Transport == types.NetworkTransportNoOverlay { + requiredMTU = config.Default.MTU + } else { + if config.IPv4Mode && !config.IPv6Mode { + // we run in single-stack IPv4 only + requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv4 + } else { + // we run in single-stack IPv6 or dual-stack mode + requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv6 + } + } + // OVN allows `external_ids:ovn-encap-ip` to be a list of IPs separated by comma ovnEncapIps := strings.Split(config.Default.EffectiveEncapIP, ",") for _, ip := range ovnEncapIps { @@ -1610,20 +1453,6 @@ func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { return fmt.Errorf("could not get MTU for the interface with address %s: %w", ovnEncapIP, err) } - // calc required MTU - var requiredMTU int - if config.Gateway.SingleNode { - requiredMTU = config.Default.MTU - } else { - if config.IPv4Mode && !config.IPv6Mode { - // we run in single-stack IPv4 only - requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv4 - } else { - // we run in single-stack IPv6 or dual-stack mode - requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv6 - } - } - if mtu < requiredMTU { return fmt.Errorf("MTU (%d) of network interface %s is too small for specified overlay MTU (%d)", mtu, interfaceName, requiredMTU) diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index ccfa18af0e..76891520c8 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -1769,7 +1769,6 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } ip string port uint16 protocol uint8 - family netlink.InetFamily } // Test data structure for table-driven tests @@ -1782,12 +1781,21 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } expectedFilters []expectedConntrackFilter } - // Helper to create EndpointSlice - makeEndpointSlice := func(portConfigs []struct { + type endpointPortConfig struct { name *string port int32 protocol corev1.Protocol - }, addresses []string) *discovery.EndpointSlice { + } + + type servicePortConfig struct { + name string + port int32 + targetPort int32 + protocol corev1.Protocol + } + + // Helper to create EndpointSlice + makeEndpointSlice := func(portConfigs []endpointPortConfig, addresses []string) *discovery.EndpointSlice { ports := make([]discovery.EndpointPort, len(portConfigs)) for i, pc := range portConfigs { p := pc.port @@ -1815,12 +1823,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } } // Helper to create Service - makeService := func(portConfigs []struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }) *corev1.Service { + makeService := func(portConfigs []servicePortConfig) *corev1.Service { ports := make([]corev1.ServicePort, len(portConfigs)) for i, pc := range portConfigs { ports[i] = corev1.ServicePort{ @@ -1842,6 +1845,16 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } } } + // Helper to create NodePort or LoadBalancer Service by invoking makeService + makeServiceWithNodePort := func(portConfigs []servicePortConfig, nodePorts []int32, svcType corev1.ServiceType) *corev1.Service { + svc := makeService(portConfigs) + svc.Spec.Type = svcType + for i := 0; i < len(nodePorts) && i < len(svc.Spec.Ports); i++ { + svc.Spec.Ports[i].NodePort = nodePorts[i] + } + return svc + } + // Helper function to build expected ConntrackFilter for verification buildExpectedFilter := func(ef expectedConntrackFilter) *netlink.ConntrackFilter { filter := &netlink.ConntrackFilter{} @@ -1942,13 +1955,8 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("old endpointslice is nil", reconcileConntrackTestCase{ - desc: "should not delete any conntrack entries when old endpoint is nil", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should not delete any conntrack entries when old endpoint is nil", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: nil, newEndpointSlice: &discovery.EndpointSlice{}, expectedConntrackCalls: 0, @@ -1957,69 +1965,42 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("service exists with matching unnamed port", reconcileConntrackTestCase{ - desc: "should delete conntrack with service port for unnamed port", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack with service port for unnamed port", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("service exists with matching named port", reconcileConntrackTestCase{ - desc: "should delete conntrack with service port for named port", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack with service port for named port", + service: makeService([]servicePortConfig{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("service exists but port name mismatch", reconcileConntrackTestCase{ - desc: "should skip conntrack deletion when port name doesn't match", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should skip conntrack deletion when port name doesn't match", + service: makeService([]servicePortConfig{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: strPtr("grpc"), port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: strPtr("grpc"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2032,11 +2013,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } desc: "should return early without deleting conntrack when service not found", service: nil, oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2046,19 +2023,10 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("TCP protocol should be skipped", reconcileConntrackTestCase{ - desc: "should skip conntrack deletion for TCP protocol", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: tcpProtocol}}), + desc: "should skip conntrack deletion for TCP protocol", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: tcpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: tcpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: tcpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2068,78 +2036,51 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("multiple endpoints", reconcileConntrackTestCase{ - desc: "should delete conntrack for each endpoint", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for each endpoint", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"}, ), newEndpointSlice: nil, expectedConntrackCalls: 3, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.2", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.3", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.2", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.3", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("IPv6 endpoint", reconcileConntrackTestCase{ - desc: "should delete conntrack for IPv6 endpoint", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for IPv6 endpoint", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"fd00::1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V6}, + {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("dual-stack endpoints", reconcileConntrackTestCase{ - desc: "should delete conntrack for both IPv4 and IPv6", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for both IPv4 and IPv6", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1", "fd00::1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 2, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V6}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), @@ -2147,21 +2088,12 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("multiple service ports with matching names", reconcileConntrackTestCase{ desc: "should match correct service port by name for multiple ports", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{ + service: makeService([]servicePortConfig{ {name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, {name: "https", port: testServicePort2, targetPort: testEndpointPort2, protocol: udpProtocol}, }), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{ + []endpointPortConfig{ {name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}, {name: strPtr("https"), port: testEndpointPort2, protocol: udpProtocol}, }, @@ -2170,11 +2102,94 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } newEndpointSlice: nil, expectedConntrackCalls: 2, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.1", port: uint16(testServicePort2), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.1", port: uint16(testServicePort2), protocol: syscall.IPPROTO_UDP}, }, }, ), + Entry("NodePort service", reconcileConntrackTestCase{ + desc: "should delete conntrack entries for both service port and NodePort", + service: makeServiceWithNodePort([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}, + []int32{30000}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("NodePort service with mixed protocols should only clean UDP NodePort", reconcileConntrackTestCase{ + desc: "should only delete conntrack for UDP NodePort, not TCP (protocol filtering)", + service: makeServiceWithNodePort([]servicePortConfig{ + {name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, + {name: "", port: testServicePort2, targetPort: testEndpointPort1, protocol: tcpProtocol}, + }, []int32{30000, 30001}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, // Only UDP: service port + NodePort (TCP port 30001 should be skipped) + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("NodePort service with multiple UDP ports", reconcileConntrackTestCase{ + desc: "should delete conntrack entries only for the specific NodePort that changed", + service: makeServiceWithNodePort([]servicePortConfig{ + {name: "dns", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, + {name: "snmp", port: testServicePort2, targetPort: testEndpointPort1, protocol: udpProtocol}, + }, []int32{30000, 30002}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: strPtr("dns"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: strPtr("dns"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, // service port + NodePort for "dns" only + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("LoadBalancer service with NodePort allocation", reconcileConntrackTestCase{ + desc: "should delete conntrack entries for both service port and NodePort", + service: func() *corev1.Service { + svc := makeServiceWithNodePort([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}, + []int32{30000}, corev1.ServiceTypeLoadBalancer) + svc.Status = corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{{IP: "5.5.5.5"}}, + }, + } + return svc + }(), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("LoadBalancer service with AllocateLoadBalancerNodePorts=false", func() reconcileConntrackTestCase { + allocateNodePorts := false + return reconcileConntrackTestCase{ + desc: "should only delete conntrack entries for service port (no NodePort)", + service: func() *corev1.Service { + svc := makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}) + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + svc.Spec.AllocateLoadBalancerNodePorts = &allocateNodePorts + svc.Status = corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{{IP: "5.5.5.5"}}, + }, + } + return svc + }(), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 1, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + }, + } + }()), ) }) }) diff --git a/go-controller/pkg/node/egressip/gateway_egressip.go b/go-controller/pkg/node/egressip/gateway_egressip.go index 83657404b8..cccc79642b 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip.go +++ b/go-controller/pkg/node/egressip/gateway_egressip.go @@ -175,6 +175,7 @@ type BridgeEIPAddrManager struct { nodeName string bridgeName string nodeAnnotationMu sync.Mutex + annotationIPs sets.Set[string] eIPLister egressiplisters.EgressIPLister eIPInformer cache.SharedIndexInformer nodeLister corev1listers.NodeLister @@ -195,6 +196,7 @@ func NewBridgeEIPAddrManager(nodeName, bridgeName string, linkManager *linkmanag nodeName: nodeName, // k8 node name bridgeName: bridgeName, // bridge name for which EIP IPs are managed nodeAnnotationMu: sync.Mutex{}, // mu for updating Node annotation + annotationIPs: sets.New[string](), eIPLister: eIPInformer.Lister(), eIPInformer: eIPInformer.Informer(), nodeLister: nodeInformer.Lister(), @@ -305,6 +307,9 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { if err != nil { return fmt.Errorf("failed to sync EgressIP gateway config because unable to get Node annotation: %v", err) } + g.nodeAnnotationMu.Lock() + g.annotationIPs = sets.New[string](getIPsStr(annotIPs...)...) + g.nodeAnnotationMu.Unlock() configs := markIPs{v4: map[int]string{}, v6: map[int]string{}} for _, obj := range objs { eip, ok := obj.(*egressipv1.EgressIP) @@ -349,72 +354,60 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { return nil } -// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller -// may repeat addition of addresses without care for duplicate addresses being added. -func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { - g.nodeAnnotationMu.Lock() - defer g.nodeAnnotationMu.Unlock() +// updateAnnotationLocked updates the node's egress IPs +// Must be called with nodeAnnotationMu locked +func (g *BridgeEIPAddrManager) updateAnnotationLocked(updatedIPs sets.Set[string]) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { node, err := g.nodeLister.Get(g.nodeName) if err != nil { return err } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPStr := candidateIP.String() - if existingIPsSet.Has(candidateIPStr) { - return nil - } - patch, err := json.Marshal(existingIPsSet.Insert(candidateIPStr).UnsortedList()) + patch, err := json.Marshal(updatedIPs.UnsortedList()) if err != nil { return err } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) + return g.kube.UpdateNodeStatus(nodeToUpdate) }) } +// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller +// may repeat addition of addresses without care for duplicate addresses being added. +func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { + g.nodeAnnotationMu.Lock() + defer g.nodeAnnotationMu.Unlock() + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Insert(candidateIP.String()) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil +} + // deleteIPsFromAnnotation deletes address from annotation. If multiple users, callers must synchronise. // deletion of address that doesn't exist will not cause an error. func (g *BridgeEIPAddrManager) deleteIPsFromAnnotation(candidateIPs ...net.IP) error { g.nodeAnnotationMu.Lock() defer g.nodeAnnotationMu.Unlock() - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - node, err := g.nodeLister.Get(g.nodeName) - if err != nil { - return err - } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - if len(existingIPsStr) == 0 { - return nil - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPsStr := getIPsStr(candidateIPs...) - if !existingIPsSet.HasAny(candidateIPsStr...) { - return nil - } - existingIPsSet.Delete(candidateIPsStr...) - patch, err := json.Marshal(existingIPsSet.UnsortedList()) - if err != nil { - return err - } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) - }) + candidateIPsStr := getIPsStr(candidateIPs...) + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Delete(candidateIPsStr...) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil } func (g *BridgeEIPAddrManager) addIPBridge(ip net.IP) error { diff --git a/go-controller/pkg/node/egressip/gateway_egressip_test.go b/go-controller/pkg/node/egressip/gateway_egressip_test.go index 6493cb968a..816219df0f 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip_test.go +++ b/go-controller/pkg/node/egressip/gateway_egressip_test.go @@ -12,6 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -72,9 +73,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -122,9 +125,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -164,9 +169,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.UpdateEgressIP(unassignedEIP, assignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -189,9 +196,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.UpdateEgressIP(assignedEIP, unassignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -250,9 +259,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.DeleteEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -290,9 +301,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { eipUnassigned3 := getEIPNotAssignedToNode(mark3, ipV4Addr3) err := addrMgr.SyncEgressIP([]interface{}{eipAssigned1, eipAssigned2, eipUnassigned3}) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process valid EgressIPs") - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -374,9 +387,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { // Verify cleanup: secondary IP removed from cache, annotation, and bridge gomega.Expect(addrMgr.cache.IsIPPresent(net.ParseIP(secondaryIP))).Should(gomega.BeFalse(), "secondary IP should be removed from cache") - node, err = addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(secondaryIP), bridgeLinkIndex))).Should(gomega.BeTrue(), "should delete secondary IP from bridge") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -411,8 +426,17 @@ func initBridgeEIPAddrManagerWithHostCIDRs(nodeName, bridgeName string, bridgeEI gomega.Expect(watchFactory.Start()).Should(gomega.Succeed(), "watch factory should start") gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "watch factory creation must succeed") linkManager := linkmanager.NewController(nodeName, true, true, nil) - return NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()), - watchFactory.Shutdown + addrMgr := NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()) + initialAnnotIPs, err := util.ParseNodeBridgeEgressIPsAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + initialAnnotIPs = make([]string, 0) + } else { + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "bridge EgressIP annotation should be parseable") + } + } + addrMgr.annotationIPs = sets.New[string](initialAnnotIPs...) + return addrMgr, watchFactory.Shutdown } func getEIPAssignedToNode(nodeName, mark, assignedIP string) *egressipv1.EgressIP { diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 6625b04e8c..f0eb9094d6 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -75,7 +75,7 @@ func getGatewayNextHops() ([]net.IP, string, error) { } } gatewayIntf := config.Gateway.Interface - if gatewayIntf != "" { + if gatewayIntf != "" && config.OvnKubeNode.Mode != types.NodeModeDPUHost { if bridgeName, _, err := util.RunOVSVsctl("port-to-br", gatewayIntf); err == nil { // This is an OVS bridge's internal port gatewayIntf = bridgeName diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 89b858d09c..475bd328a8 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -204,7 +204,7 @@ func newEndpointSlice(svcName, namespace string, endpoints []discovery.Endpoint, } } -func makeConntrackFilter(ip string, port int, protocol corev1.Protocol) *netlink.ConntrackFilter { +func makeConntrackFilter(ip string, port int, protocol corev1.Protocol, filterType netlink.ConntrackFilterType) *netlink.ConntrackFilter { filter := &netlink.ConntrackFilter{} var err error @@ -223,15 +223,17 @@ func makeConntrackFilter(ip string, port int, protocol corev1.Protocol) *netlink } ipAddress := net.ParseIP(ip) Expect(ipAddress).NotTo(BeNil()) - err = filter.AddIP(netlink.ConntrackOrigDstIP, ipAddress) + err = filter.AddIP(filterType, ipAddress) Expect(err).NotTo(HaveOccurred()) return filter } type ctFilterDesc struct { - ip string - port int + ip string + port int + protocol corev1.Protocol + filterType netlink.ConntrackFilterType } func addConntrackMocks(nlMock *mocks.NetLinkOps, filterDescs []ctFilterDesc) { @@ -242,7 +244,7 @@ func addConntrackMocks(nlMock *mocks.NetLinkOps, filterDescs []ctFilterDesc) { OnCallMethodArgs: []interface{}{ netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(ctf.ip, ctf.port, corev1.ProtocolTCP), + makeConntrackFilter(ctf.ip, ctf.port, ctf.protocol, ctf.filterType), }, RetArgList: []interface{}{uint(1), nil}, }) @@ -1789,7 +1791,7 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = wf Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"1.1.1.1", 8032}, {"10.129.0.2", 8032}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"1.1.1.1", 8032, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"10.129.0.2", 8032, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) Eventually(func() bool { @@ -1878,7 +1880,7 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = wf Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 0}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 0, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) Eventually(fExec.CalledMatchesExpected, "2s").Should(BeTrue(), fExec.ErrorDesc) @@ -1981,7 +1983,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -1991,7 +1993,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP)). + makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2057,7 +2059,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2067,7 +2069,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP)). + makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2182,7 +2184,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(externalIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(externalIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2253,7 +2255,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(lbIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(lbIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2322,7 +2324,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, 80, corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, 80, corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2427,7 +2429,7 @@ var _ = Describe("Node Operations", func() { return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) }).Should(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.10.10.1", 8034}, {"10.129.0.2", 8034}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.10.10.1", 8034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"10.129.0.2", 8034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -2555,11 +2557,11 @@ var _ = Describe("Node Operations", func() { }).Should(Equal(expectedLBExternalIPFlows2)) addConntrackMocks(netlinkMock, []ctFilterDesc{ - {"1.1.1.1", 8080}, - {"1.1.1.2", 8080}, - {"5.5.5.5", 8080}, - {"192.168.18.15", 31111}, - {"10.129.0.2", 8080}, + {"1.1.1.1", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"1.1.1.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"5.5.5.5", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, }) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( @@ -2774,7 +2776,7 @@ var _ = Describe("Node Operations", func() { return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) }).Should(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 38034}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 38034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -2911,7 +2913,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3057,7 +3059,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3207,7 +3209,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3352,7 +3354,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3500,7 +3502,7 @@ var _ = Describe("Node Operations", func() { Expect(fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111")).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index de5d1ee235..29d6103c0a 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -827,6 +827,9 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi } nftElems := getGatewayNFTRules(service, localEndpoints, true) nftElems = append(nftElems, getGatewayNFTRules(service, localEndpoints, false)...) + if util.IsNetworkSegmentationSupportEnabled() { + nftElems = append(nftElems, getUDNNFTRules(service, nil)...) + } if len(nftElems) > 0 { if err := nodenft.DeleteNFTElements(nftElems); err != nil { err = fmt.Errorf("failed to delete nftables rules for service %s/%s: %v", @@ -834,33 +837,6 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi errors = append(errors, err) } } - - if util.IsNetworkSegmentationSupportEnabled() { - // NOTE: The code below is not using nodenft.DeleteNFTElements because it first adds elements - // before removing them, which fails for UDN NFT rules. These rules only have map keys, - // not key-value pairs, making it impossible to add. - // Attempt to delete the elements directly and handle the IsNotFound error. - // - // TODO: Switch to `nft destroy` when supported. - nftElems = getUDNNFTRules(service, nil) - if len(nftElems) > 0 { - nft, err := nodenft.GetNFTablesHelper() - if err != nil { - return utilerrors.Join(append(errors, err)...) - } - - tx := nft.NewTransaction() - for _, elem := range nftElems { - tx.Delete(elem) - } - - if err := nft.Run(context.TODO(), tx); err != nil && !knftables.IsNotFound(err) { - err = fmt.Errorf("failed to delete nftables rules for UDN service %s/%s: %v", - service.Namespace, service.Name, err) - errors = append(errors, err) - } - } - } } return utilerrors.Join(errors...) @@ -889,15 +865,16 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { } klog.V(5).Infof("Adding service %s in namespace %s", service.Name, service.Namespace) - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } + name := ktypes.NamespacedName{Namespace: service.Namespace, Name: service.Name} epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) if err != nil { @@ -977,11 +954,12 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, svcConfig.localEndpoints, svcConfig.hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1219,14 +1197,20 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { } netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted. - if util.IsInvalidPrimaryNetworkError(err) { - continue - } if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsInvalidPrimaryNetworkError(err) { + continue + } errors = append(errors, err) continue } + if netInfo == nil { + // network not active on our node + continue + } epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) if err != nil { @@ -1307,6 +1291,10 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", epSlice.Name, epSlice.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(epSlice, netInfo) { return nil @@ -1425,21 +1413,19 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice // and allows graceful handling of deletion race conditions. netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(namespacedName.Namespace) if err != nil { - // If the namespace was deleted, skip adding new service rules - if apierrors.IsNotFound(err) { - klog.V(5).Infof("Namespace not found for service %s/%s during endpoint slice delete, skipping adding service rules", - namespacedName.Namespace, namespacedName.Name) - return utilerrors.Join(errors...) - } - // If the UDN was deleted, skip adding new service rules + // If the UDN was deleted or not processed yet, skip adding new service rules if util.IsInvalidPrimaryNetworkError(err) { - klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network invalid: %v", + klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network unavailable: %v", namespacedName.Namespace, namespacedName.Name, err) return utilerrors.Join(errors...) } errors = append(errors, fmt.Errorf("error getting active network for service %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err)) return utilerrors.Join(errors...) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(svcConfig.service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1480,6 +1466,10 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", newEpSlice.Name, newEpSlice.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(newEpSlice, netInfo) { return nil @@ -1566,11 +1556,12 @@ func (npwipt *nodePortWatcherIptables) AddService(service *corev1.Service) error netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if err := addServiceRules(service, netInfo, nil, false, nil); err != nil { return fmt.Errorf("AddService failed for nodePortWatcherIptables: %v", err) @@ -1597,11 +1588,12 @@ func (npwipt *nodePortWatcherIptables) UpdateService(old, new *corev1.Service) e if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, nil, false, nil); err != nil { errors = append(errors, err) @@ -1642,6 +1634,21 @@ func (npwipt *nodePortWatcherIptables) SyncServices(services []interface{}) erro if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { continue } + netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.GetNamespace()) + if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsInvalidPrimaryNetworkError(err) { + continue + } + errors = append(errors, err) + continue + } + if netInfo == nil { + // network not on our node + continue + } // Add correct iptables rules. // TODO: ETP and ITP is not implemented for smart NIC mode. keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, nil, false)...) diff --git a/go-controller/pkg/node/gateway_shared_intf_test.go b/go-controller/pkg/node/gateway_shared_intf_test.go index 065b7c52ad..43078f027a 100644 --- a/go-controller/pkg/node/gateway_shared_intf_test.go +++ b/go-controller/pkg/node/gateway_shared_intf_test.go @@ -10,8 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/fake" @@ -21,6 +20,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -38,9 +39,14 @@ type mockNetworkManagerWithNamespaceNotFoundError struct { networkmanager.Interface } -func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { - notFoundErr := apierrors.NewNotFound(schema.GroupResource{Resource: "namespaces"}, namespace) - return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, notFoundErr) +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetPrimaryNADForNamespace(_ string) (string, error) { + // Simulate namespace deletion: no primary NAD by definition. + return "", nil +} + +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // Namespace is gone; new GetActiveNetworkForNamespace semantics return nil, nil. + return nil, nil } // mockNetworkManagerWithInvalidPrimaryNetworkError simulates UDN deletion scenario @@ -48,6 +54,11 @@ type mockNetworkManagerWithInvalidPrimaryNetworkError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -57,10 +68,74 @@ type mockNetworkManagerWithError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, fmt.Errorf("network lookup failed for namespace %q", namespace) } +// mockNetworkManagerWithInvalidPrimaryNetworkSkip simulates a namespace that +// requires a primary UDN but is currently in invalid primary network state. +type mockNetworkManagerWithInvalidPrimaryNetworkSkip struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetPrimaryNADForNamespace(namespace string) (string, error) { + return "", util.NewInvalidPrimaryNetworkError(namespace) +} + +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { + return nil, util.NewInvalidPrimaryNetworkError(namespace) +} + +// mockNetworkManagerWithInactiveNode simulates a UDN where the node is inactive for the network. +type mockNetworkManagerWithInactiveNode struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithInactiveNode) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithInactiveNode) GetNetworkNameForNADKey(_ string) string { + return "test-udn" +} + +func (m *mockNetworkManagerWithInactiveNode) NodeHasNetwork(_, _ string) bool { + return false +} + +func (m *mockNetworkManagerWithInactiveNode) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // New code paths resolve activity directly via GetActiveNetworkForNamespace. + // Returning nil netInfo means "network not active on this node". + return nil, nil +} + +// mockNetworkManagerWithActiveUDN simulates a UDN active on this node. +type mockNetworkManagerWithActiveUDN struct { + networkmanager.Interface + netInfo util.NetInfo +} + +func (m *mockNetworkManagerWithActiveUDN) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithActiveUDN) GetNetworkNameForNADKey(_ string) string { + return m.netInfo.GetNetworkName() +} + +func (m *mockNetworkManagerWithActiveUDN) NodeHasNetwork(_, _ string) bool { + return true +} + +func (m *mockNetworkManagerWithActiveUDN) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + return m.netInfo, nil +} + // verifyIPTablesRule checks if an iptables rule exists and asserts the expected state func verifyIPTablesRule(ipt util.IPTablesHelper, serviceIP string, servicePort, nodePort int32, shouldExist bool, message string) { exists, err := ipt.Exists("nat", "OVN-KUBE-NODEPORT", @@ -256,3 +331,153 @@ var _ = Describe("DeleteEndpointSlice", func() { }) }) }) + +var _ = Describe("SyncServices", func() { + var ( + fakeClient *util.OVNNodeClientset + watcher *factory.WatchFactory + npw *nodePortWatcher + iptV4 util.IPTablesHelper + iptV6 util.IPTablesHelper + ) + + const ( + nodeName = "test-node" + testNamespace = "test-namespace" + testService = "test-service" + ) + + BeforeEach(func() { + var err error + Expect(config.PrepareTestConfig()).To(Succeed()) + config.Gateway.Mode = config.GatewayModeLocal + config.IPv4Mode = true + config.IPv6Mode = false + _ = nodenft.SetFakeNFTablesHelper() + + fakeClient = &util.OVNNodeClientset{ + KubeClient: fake.NewSimpleClientset(), + } + fakeClient.AdminPolicyRouteClient = adminpolicybasedrouteclient.NewSimpleClientset() + fakeClient.NetworkAttchDefClient = nadfake.NewSimpleClientset() + fakeClient.UserDefinedNetworkClient = udnfakeclient.NewSimpleClientset() + + watcher, err = factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + err = watcher.Start() + Expect(err).NotTo(HaveOccurred()) + + iptV4, iptV6 = util.SetFakeIPTablesHelpers() + npw = initFakeNodePortWatcher(iptV4, iptV6) + npw.watchFactory = watcher + npw.networkManager = networkmanager.Default().Interface() + + k := &kube.Kube{KClient: fakeClient.KubeClient} + npw.nodeIPManager = newAddressManagerInternal(nodeName, k, nil, watcher, nil, false) + }) + + AfterEach(func() { + watcher.Shutdown() + }) + + Context("when namespace has invalid primary network", func() { + It("should skip service sync without failing startup", func() { + service := newService(testService, testNamespace, "10.96.0.20", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30091, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithInvalidPrimaryNetworkSkip{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.20", 80, 30091, false, + "iptables rule should not be created when primary network is invalid") + }) + }) + + Context("when UDN is inactive on this node", func() { + It("should skip service sync without installing rules", func() { + service := newService(testService, testNamespace, "10.96.0.30", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30092, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithInactiveNode{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.30", 80, 30092, false, + "iptables rule should not be created when UDN is inactive on this node") + }) + }) + + Context("when UDN is active on this node", func() { + It("should install nodeport rules", func() { + // Avoid openflow dependency in this test. + config.Gateway.AllowNoUplink = true + npw.ofportPhys = "" + + service := newService(testService, testNamespace, "10.96.0.40", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30093, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + nad := ovntest.GenerateNAD("test-udn", "test-nad", testNamespace, types.Layer3Topology, "10.1.0.0/16", types.NetworkRolePrimary) + netInfo, err := util.ParseNADInfo(nad) + Expect(err).NotTo(HaveOccurred()) + npw.networkManager = &mockNetworkManagerWithActiveUDN{netInfo: netInfo} + + nodeName := npw.nodeIPManager.nodeName + epPortName := "http" + epPortValue := int32(8080) + epPortProtocol := corev1.ProtocolTCP + epSlice := &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: testService + "ab23", + Namespace: testNamespace, + Labels: map[string]string{ + types.LabelUserDefinedServiceName: testService, + }, + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: netInfo.GetNetworkName(), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{{ + Addresses: []string{"10.244.0.9"}, + NodeName: &nodeName, + }}, + Ports: []discovery.EndpointPort{{ + Name: &epPortName, + Protocol: &epPortProtocol, + Port: &epPortValue, + }}, + } + Expect(watcher.EndpointSliceInformer().GetStore().Add(epSlice)).To(Succeed()) + + err = npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.40", 80, 30093, true, + "iptables rule should be created when UDN is active on this node") + }) + }) +}) diff --git a/go-controller/pkg/node/healthcheck_service.go b/go-controller/pkg/node/healthcheck_service.go index dc906f4f1d..30ce4e793b 100644 --- a/go-controller/pkg/node/healthcheck_service.go +++ b/go-controller/pkg/node/healthcheck_service.go @@ -47,6 +47,8 @@ func (l *loadBalancerHealthChecker) AddService(svc *corev1.Service) error { if err := l.server.SyncServices(l.services); err != nil { return fmt.Errorf("unable to sync service %v; err: %v", name, err) } + // we can use CDN here and do not care about UDN because we are just looking for a count + // which will be the same between CDN and UDN epSlices, err := l.watchFactory.GetServiceEndpointSlices(svc.Namespace, svc.Name, types.DefaultNetworkName) if err != nil { return fmt.Errorf("could not fetch endpointslices "+ diff --git a/go-controller/pkg/node/nftables/helpers.go b/go-controller/pkg/node/nftables/helpers.go index 3e8ed11ff4..07873378d6 100644 --- a/go-controller/pkg/node/nftables/helpers.go +++ b/go-controller/pkg/node/nftables/helpers.go @@ -28,7 +28,7 @@ func SetFakeNFTablesHelper() *knftables.Fake { // called, it will create a "real" knftables.Interface func GetNFTablesHelper() (knftables.Interface, error) { if nftHelper == nil { - nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName) + nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName, knftables.RequireDestroy) if err != nil { return nil, err } diff --git a/go-controller/pkg/node/nftables/testing.go b/go-controller/pkg/node/nftables/testing.go index ad377caeca..ba42069e25 100644 --- a/go-controller/pkg/node/nftables/testing.go +++ b/go-controller/pkg/node/nftables/testing.go @@ -5,34 +5,68 @@ package nftables import ( "fmt" + "slices" + "sort" "strings" "k8s.io/apimachinery/pkg/util/sets" ) // MatchNFTRules checks that the expected nftables rules match the actual ones, ignoring -// order. +// order and extra whitespace. func MatchNFTRules(expected, actual string) error { - expectedSet := sets.New(strings.Split(expected, "\n")...) - actualSet := sets.New(strings.Split(actual, "\n")...) - - // ignore blank lines - expectedSet.Delete("") - actualSet.Delete("") - - missing := expectedSet.Difference(actualSet) - extra := actualSet.Difference(expectedSet) - + missing, extra := diffNFTRules(expected, actual) if len(missing) == 0 && len(extra) == 0 { return nil } msg := "nftables rule mismatch:" if len(missing) > 0 { - msg += fmt.Sprintf("\nMissing rules: %v\n", missing.UnsortedList()) + msg += fmt.Sprintf("\nRules missing from `nft dump ruleset`:\n%s\n", strings.Join(missing, "\n")) } if len(extra) > 0 { - msg += fmt.Sprintf("\nExtra rules: %v\n", extra.UnsortedList()) + msg += fmt.Sprintf("\nUnexpected extra rules in `nft dump ruleset`:\n%s\n", strings.Join(extra, "\n")) } return fmt.Errorf("%s", msg) } + +// helper function, for ease of unit testing +func diffNFTRules(expected, actual string) (missing, extra []string) { + expectedLines := strings.Split(expected, "\n") + expectedSet := sets.New[string]() + for _, line := range expectedLines { + line = strings.TrimSpace(line) + if line != "" { + expectedSet.Insert(line) + } + } + + actualLines := strings.Split(actual, "\n") + actualSet := sets.New[string]() + for _, line := range actualLines { + line = strings.TrimSpace(line) + if line != "" { + actualSet.Insert(line) + } + } + + missingSet := expectedSet.Difference(actualSet) + extraSet := actualSet.Difference(expectedSet) + + // While we ignore order for purposes of the comparison, it's confusing to output + // the missing/extra rules in essentially random order (and makes it harder to see + // what the problem is in cases like "the rules are basically correct, except that + // they have the wrong IP"). So we sort the `missing` rules back into the same + // order as they appeared in `expected`, and the `extra` rules into the same order + // as they appeared in `actual`. + missingSorted := missingSet.UnsortedList() + sort.Slice(missingSorted, func(i, j int) bool { + return slices.Index(expectedLines, missingSorted[i]) < slices.Index(expectedLines, missingSorted[j]) + }) + extraSorted := extraSet.UnsortedList() + sort.Slice(extraSorted, func(i, j int) bool { + return slices.Index(actualLines, extraSorted[i]) < slices.Index(actualLines, extraSorted[j]) + }) + + return missingSorted, extraSorted +} diff --git a/go-controller/pkg/node/nftables/testing_test.go b/go-controller/pkg/node/nftables/testing_test.go new file mode 100644 index 0000000000..d0ce907d23 --- /dev/null +++ b/go-controller/pkg/node/nftables/testing_test.go @@ -0,0 +1,86 @@ +//go:build linux +// +build linux + +package nftables + +import ( + "reflect" + "testing" +) + +func Test_diffNFTRules(t *testing.T) { + for _, tc := range []struct { + name string + expected string + actual string + missing []string + extra []string + }{ + { + name: "empty match", + expected: "", + actual: "", + missing: []string{}, + extra: []string{}, + }, + { + name: "non-empty match", + expected: "line one\nline two\nline three\n", + actual: "line three\nline one\nline two\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "match with extra whitespace", + expected: " line one\n line two\n line three\n", + actual: "\nline three\nline one\nline two\n\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "missing lines", + expected: "line one\nline two\nline three\nline four\n", + actual: "line two\nline four\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "missing lines, alternate order", + expected: "line one\nline two\nline three\nline four\n", + actual: "line four\nline two\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "extra lines", + expected: "line two\nline four\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "extra lines, alternate order", + expected: "line four\nline two\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "missing and extra lines, inconsistent whitespace", + expected: " line one\n line two\n line three\n", + actual: " line two\n line two-and-a-half\nline three", + missing: []string{"line one"}, + extra: []string{"line two-and-a-half"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + missing, extra := diffNFTRules(tc.expected, tc.actual) + if !reflect.DeepEqual(tc.missing, missing) { + t.Errorf("expected missing=%#v, got %#v", tc.missing, missing) + } + if !reflect.DeepEqual(tc.extra, extra) { + t.Errorf("expected extra=%#v, got %#v", tc.extra, extra) + } + }) + } +} diff --git a/go-controller/pkg/node/nftables/util.go b/go-controller/pkg/node/nftables/util.go index 1a4a3bdd21..ce14186e9f 100644 --- a/go-controller/pkg/node/nftables/util.go +++ b/go-controller/pkg/node/nftables/util.go @@ -34,10 +34,7 @@ func DeleteNFTElements(elements []*knftables.Element) error { tx := nft.NewTransaction() for _, elem := range elements { - // We add+delete the elements, rather than just deleting them, so that if - // they weren't already in the set/map, we won't get an error on delete. - tx.Add(elem) - tx.Delete(elem) + tx.Destroy(elem) } return nft.Run(context.TODO(), tx) } diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index dda4e69da0..d46c758780 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -12,6 +12,7 @@ import ( "github.com/vishvananda/netlink" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -96,10 +97,10 @@ func (c *addressManager) addAddr(ipnet net.IPNet, linkIndex int) bool { // removes IP from address manager // returns true if there was an update -func (c *addressManager) delAddr(ipnet net.IPNet, linkIndex int) bool { +func (c *addressManager) delAddr(ipnet net.IPNet) bool { c.Lock() defer c.Unlock() - if c.cidrs.Has(ipnet.String()) && c.isValidNodeIP(ipnet.IP, linkIndex) { + if c.cidrs.Has(ipnet.String()) { klog.Infof("Removing IP: %s, from node IP manager", ipnet) c.cidrs.Delete(ipnet.String()) return true @@ -134,7 +135,7 @@ func (c *addressManager) Run(stopChan <-chan struct{}, doneWg *sync.WaitGroup) { return } - c.addHandlerForPrimaryAddrChange() + c.addHandlerForAddrChange() doneWg.Add(1) go func() { c.runInternal(stopChan, c.getNetlinkAddrSubFunc(stopChan)) @@ -172,7 +173,7 @@ func (c *addressManager) runInternal(stopChan <-chan struct{}, subscribe subscri if a.NewAddr { addrChanged = c.addAddr(a.LinkAddress, a.LinkIndex) } else { - addrChanged = c.delAddr(a.LinkAddress, a.LinkIndex) + addrChanged = c.delAddr(a.LinkAddress) } c.handleNodePrimaryAddrChange() @@ -218,14 +219,24 @@ func (c *addressManager) getNetlinkAddrSubFunc(stopChan <-chan struct{}) func() } } -// addHandlerForPrimaryAddrChange handles reconfiguration of a node primary IP address change -func (c *addressManager) addHandlerForPrimaryAddrChange() { +// addHandlerForAddrChange handles reconfiguration of a node primary IP address change or egress IP annotation changes +func (c *addressManager) addHandlerForAddrChange() { // Add an event handler to the node informer. This is needed for cases where users first update the node's IP // address but only later update kubelet configuration and restart kubelet (which in turn will update the reported // IP address inside the node's status field). + // It is also needed to cover gaps when the egress IPs are updated in annotations, in order to + // maintain a consistent host-cidrs set, without stale Egress IPs. nodeInformer := c.watchFactory.NodeInformer() _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - UpdateFunc: func(_, _ interface{}) { + UpdateFunc: func(oldObj, newObj interface{}) { + oldNode, oldOK := oldObj.(*corev1.Node) + newNode, newOK := newObj.(*corev1.Node) + if oldOK && newOK && newNode.Name == c.nodeName && nodeEgressIPAnnotationsChanged(oldNode, newNode) { + klog.V(5).Infof("Node %s egress IP annotations changed, syncing node IP manager", c.nodeName) + c.sync() + // c.sync() already calls c.handleNodePrimaryAddrChange, so safe to return + return + } c.handleNodePrimaryAddrChange() }, }) @@ -234,6 +245,20 @@ func (c *addressManager) addHandlerForPrimaryAddrChange() { } } +func nodeEgressIPAnnotationsChanged(oldNode, newNode *corev1.Node) bool { + if oldNode == nil || newNode == nil { + return false + } + for _, key := range []string{util.OVNNodeSecondaryHostEgressIPs, util.OVNNodeBridgeEgressIPs} { + oldVal, oldSet := oldNode.Annotations[key] + newVal, newSet := newNode.Annotations[key] + if oldSet != newSet || oldVal != newVal { + return true + } + } + return false +} + // updates OVN's EncapIP if the node IP changed func (c *addressManager) handleNodePrimaryAddrChange() { c.Lock() @@ -381,8 +406,11 @@ func (c *addressManager) nodePrimaryAddrChanged() (bool, error) { return true, nil } -// detects if the IP is valid for a node -// excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs for non-ovs type interfaces +// isValidNodeIP detects if the IP is valid for a node. +// It excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs +// for non-ovs type interfaces. +// Note, it possible that the node annotations may not be up to date when this check is executed. +// For this reason, sync is triggered on annotation change via addHandlerForAddrChange. func (c *addressManager) isValidNodeIP(addr net.IP, linkIndex int) bool { if addr == nil { return false diff --git a/go-controller/pkg/node/node_ip_handler_linux_test.go b/go-controller/pkg/node/node_ip_handler_linux_test.go index c78307cca1..fd549ed7df 100644 --- a/go-controller/pkg/node/node_ip_handler_linux_test.go +++ b/go-controller/pkg/node/node_ip_handler_linux_test.go @@ -170,6 +170,54 @@ var _ = Describe("Node IP Handler event tests", func() { }) }) +var _ = Describe("Node IP Handler helper tests", func() { + const nodeName = "node1" + + It("removes cached IPs even when they are no longer valid node IPs", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.Lock() + tc.ipManager.cidrs.Insert(tc.mgmtPortIP4.String()) + tc.ipManager.Unlock() + + Expect(tc.ipManager.delAddr(*tc.mgmtPortIP4)).To(BeTrue()) + _, networks := tc.ipManager.ListAddresses() + Expect(networks).To(BeEmpty()) + }) + + It("syncs stale host-cidrs when egress IP annotations change", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.addHandlerForAddrChange() + + staleEIP := "2001:db8:abcd:1234:c001::" + node, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + + nodeToUpdate := node.DeepCopy() + nodeToUpdate.Annotations[util.OVNNodeHostCIDRs] = fmt.Sprintf("[\"%s\", \"%s\", \"%s/128\"]", "10.1.1.10/24", "2001:db8::10/64", staleEIP) + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = fmt.Sprintf("[\"%s\"]", staleEIP) + _, err = tc.fakeClient.CoreV1().Nodes().Update(context.TODO(), nodeToUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() bool { + updatedNode, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return false + } + hostIPs, err := util.ParseNodeHostCIDRsDropNetMask(updatedNode) + if err != nil { + return false + } + return !hostIPs.Has(staleEIP) + }, 5).Should(BeTrue()) + }) +}) + var _ = Describe("Node IP Handler tests", func() { // To ensure that variables don't leak between parallel Ginkgo specs, // put all test context into a single struct and reference it via diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index b55fff21cd..d2dc2eb82f 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -119,34 +119,40 @@ func (c *openflowManager) requestFlowSync() { func (c *openflowManager) syncFlows() { c.flowMutex.Lock() - defer c.flowMutex.Unlock() - - flows := []string{} - for _, entry := range c.flowCache { - flows = append(flows, entry...) - } + flows := flattenFlowCacheEntries(c.flowCache) + c.flowMutex.Unlock() _, stderr, err := util.ReplaceOFFlows(c.defaultBridge.GetBridgeName(), flows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.flowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.defaultBridge.GetBridgeName(), err, stderr, len(flows)) } if c.externalGatewayBridge != nil { c.exGWFlowMutex.Lock() - defer c.exGWFlowMutex.Unlock() - - flows := []string{} - for _, entry := range c.exGWFlowCache { - flows = append(flows, entry...) - } + exGWFlows := flattenFlowCacheEntries(c.exGWFlowCache) + c.exGWFlowMutex.Unlock() - _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), flows) + _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), exGWFlows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.exGWFlowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.externalGatewayBridge.GetBridgeName(), err, stderr, len(exGWFlows)) } } } +func flattenFlowCacheEntries(flowCache map[string][]string) []string { + flowCount := 0 + for _, entry := range flowCache { + flowCount += len(entry) + } + flows := make([]string, 0, flowCount) + for _, entry := range flowCache { + flows = append(flows, entry...) + } + return flows +} + // since we share the host's k8s node IP, add OpenFlow flows // -- to steer the NodePort traffic arriving on the host to the OVN logical topology and // -- to also connection track the outbound north-south traffic through l3 gateway so that diff --git a/go-controller/pkg/node/udn_isolation.go b/go-controller/pkg/node/udn_isolation.go index 6a24afd89d..7c41105948 100644 --- a/go-controller/pkg/node/udn_isolation.go +++ b/go-controller/pkg/node/udn_isolation.go @@ -357,7 +357,11 @@ func (m *UDNHostIsolationManager) runKubeletRestartTracker(ctx context.Context) klog.Errorf("Error closing dbus connection for UDN isolation: %v", err) } return - case signal := <-signalChan: + case signal, ok := <-signalChan: + if !ok || signal == nil { + // Channel was closed, connection is shutting down + return + } klog.V(5).Infof("D-Bus event received: %#v", signal) // Extract unit name from path unitPath := signal.Path diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index f1e14e574c..4620b7bb69 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -13,7 +13,6 @@ import ( corev1 "k8s.io/api/core/v1" knet "k8s.io/api/networking/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" @@ -125,6 +124,9 @@ type BaseNetworkController struct { // A cache of all logical ports known to the controller logicalPortCache *PortCache + // optional callback for consumers that need to react when a pod's logical + // port info is inserted/refreshed in logicalPortCache. + onLogicalPortCacheAdd func(pod *corev1.Pod, nadKey string) // Info about known namespaces. You must use oc.getNamespaceLocked() or // oc.waitForNamespaceLocked() to read this map, and oc.createNamespaceLocked() @@ -338,12 +340,6 @@ func (oc *BaseUserDefinedNetworkController) shouldFilterNamespace(namespace stri nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { - return false - } - if util.IsInvalidPrimaryNetworkError(err) { - return true - } return false } if nadKey == types.DefaultNetworkName { @@ -634,7 +630,7 @@ func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostS } err := libovsdbops.CreateOrUpdateLogicalSwitch(bnc.nbClient, &logicalSwitch, &logicalSwitch.OtherConfig, - &logicalSwitch.LoadBalancerGroup) + &logicalSwitch.LoadBalancerGroup, &logicalSwitch.ExternalIDs) if err != nil { return fmt.Errorf("failed to add logical switch %+v: %v", logicalSwitch, err) } @@ -1035,20 +1031,6 @@ func (bnc *BaseNetworkController) GetLocalZoneNodes() ([]*corev1.Node, error) { // isLocalZoneNode returns true if the node is part of the local zone. func (bnc *BaseNetworkController) isLocalZoneNode(node *corev1.Node) bool { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - if bnc.zone == types.OvnDefaultZone { - return !util.HasNodeMigratedZone(node) - } - /** HACK END **/ return util.GetNodeZone(node) == bnc.zone } @@ -1061,7 +1043,7 @@ func (bnc *BaseNetworkController) GetNetworkRole(pod *corev1.Pod) (string, error pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { bnc.recordPodErrorEvent(pod, err) } return "", err @@ -1182,11 +1164,39 @@ func (bnc *BaseNetworkController) AddResourceCommon(objType reflect.Type, obj in if !ok { return fmt.Errorf("could not cast %T object to *knet.NetworkPolicy", obj) } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) + foundNamespaceNAD, err := bnc.networkManager.GetPrimaryNADForNamespace(np.Namespace) + if err != nil { + // If this is a UDN namespace that hasn't been processed yet, the default + // controller should skip it while UDN controllers should retry. + if bnc.GetNetworkName() == types.DefaultNetworkName && util.IsInvalidPrimaryNetworkError(err) { + return nil + } + // Retry until the NAD controller has processed the primary NAD for this namespace. + return fmt.Errorf("could not get primary network NAD for namespace %s: %v", np.Namespace, err) + } + if foundNamespaceNAD == types.DefaultNetworkName { + // Only the default network controller should handle policies in default namespaces. + if bnc.GetNetworkName() != types.DefaultNetworkName { + return nil + } + } else { + networkName := bnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) + if networkName == "" { + return fmt.Errorf("no primary network found for namespace %s", np.Namespace) + } + if bnc.GetNetworkName() != networkName { + return nil + } + } + netInfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) if err != nil { return fmt.Errorf("could not get active network for namespace %s: %v", np.Namespace, err) } - if bnc.GetNetworkName() != netinfo.GetNetworkName() { + if netInfo == nil { + // no active network, nothing to do + return nil + } + if bnc.GetNetworkName() != netInfo.GetNetworkName() { return nil } if err := bnc.addNetworkPolicy(np); err != nil { @@ -1207,15 +1217,6 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj if !ok { return fmt.Errorf("could not cast obj of type %T to *knet.NetworkPolicy", obj) } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(knp.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted, - // while the NotFound error occurs when the namespace no longer exists. In both cases, proceed with deleting the NetworkPolicy. - if err != nil && !util.IsInvalidPrimaryNetworkError(err) && !apierrors.IsNotFound(err) { - return fmt.Errorf("could not get active network for namespace %s: %w", knp.Namespace, err) - } - if err == nil && bnc.GetNetworkName() != netinfo.GetNetworkName() { - return nil - } return bnc.deleteNetworkPolicy(knp) default: klog.Errorf("Can not process delete resource event, object type %s is not supported", objType) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index a83097e35f..6903541f73 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -538,15 +538,6 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *corev1.Pod, nadKe if !lspExist || len(existingLSP.Options["iface-id-ver"]) != 0 { lsp.Options["iface-id-ver"] = string(pod.UID) } - // Bind the port to the node's chassis; prevents ping-ponging between - // chassis if ovnkube-node isn't running correctly and hasn't cleared - // out iface-id for an old instance of this pod, and the pod got - // rescheduled. - - if !config.Kubernetes.DisableRequestedChassis { - lsp.Options[libovsdbops.RequestedChassis] = pod.Spec.NodeName - } - // let's calculate if this network controller's role for this pod // and pass that information while determining the podAnnotations networkRole, err := bnc.GetNetworkRole(pod) @@ -559,6 +550,28 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *corev1.Pod, nadKe return nil, nil, nil, false, nil } + // Bind the port to the node's chassis. + // For IC this is required for Layer 2 networks with remote ports. + // For Legacy with OVN Central Mode it prevents ping-ponging between + // chassis if ovnkube-node isn't running correctly and hasn't cleared + // out iface-id for an old instance of this pod, and the pod got + // rescheduled. + var node *corev1.Node + if !config.Kubernetes.DisableRequestedChassis { + node, err = bnc.watchFactory.GetNode(pod.Spec.NodeName) + if err != nil { + return nil, nil, nil, false, err + } + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + return nil, nil, nil, false, ovntypes.NewSuppressedError(err) + } + return nil, nil, nil, false, err + } + lsp.Options[libovsdbops.RequestedChassis] = chassisID + } + // Although we have different code to allocate the pod annotation for the // default network and user-defined networks, at the time of this writing they // are functionally equivalent and the only reason to keep them separated is @@ -800,7 +813,8 @@ func calculateStaticMAC(podDesc string, mac string) (net.HardwareAddr, error) { } // allocatePodAnnotation and update the corresponding pod annotation. -func (bnc *BaseNetworkController) allocatePodAnnotation(pod *corev1.Pod, existingLSP *nbdb.LogicalSwitchPort, podDesc, nadKey string, network *nadapi.NetworkSelectionElement, networkRole string) (*util.PodAnnotation, bool, error) { +func (bnc *BaseNetworkController) allocatePodAnnotation(pod *corev1.Pod, existingLSP *nbdb.LogicalSwitchPort, podDesc, + nadKey string, network *nadapi.NetworkSelectionElement, networkRole string) (*util.PodAnnotation, bool, error) { var releaseIPs bool var podMac net.HardwareAddr var podIfAddrs []*net.IPNet diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index 79a46449ae..5507c23bc0 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -35,7 +35,10 @@ const ( // netpolDefaultDenyACLType is used to distinguish default deny and arp allow acls create for the same port group defaultDenyACL netpolDefaultDenyACLType = "defaultDeny" arpAllowACL netpolDefaultDenyACLType = "arpAllow" + icmpAllowACL netpolDefaultDenyACLType = "icmpAllow" + // icmpAllowPolicyMatch is the match used when creating default allow ICMP and ICMPv6 ACLs for a namespace + icmpAllowPolicyMatch = "(icmp || icmp6)" // arpAllowPolicyMatch is the match used when creating default allow ARP ACLs for a namespace arpAllowPolicyMatch = "(arp || nd)" allowHairpinningACLID = "allow-hairpinning" @@ -383,16 +386,22 @@ func (bnc *BaseNetworkController) defaultDenyPortGroupName(namespace string, acl } func (bnc *BaseNetworkController) buildDenyACLs(namespace, pgName string, aclLogging *libovsdbutil.ACLLoggingLevels, - aclDir libovsdbutil.ACLDirection) (denyACL, allowACL *nbdb.ACL) { + aclDir libovsdbutil.ACLDirection) []*nbdb.ACL { denyMatch := libovsdbutil.GetACLMatch(pgName, "", aclDir) - allowMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) + allowARPMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) aclPipeline := libovsdbutil.ACLDirectionToACLPipeline(aclDir) - denyACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), - types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline) - allowACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), - types.DefaultAllowPriority, allowMatch, nbdb.ACLActionAllow, nil, aclPipeline) - return + acls := make([]*nbdb.ACL, 0, 3) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), + types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline)) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), + types.DefaultAllowPriority, allowARPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + allowICMPMatch := libovsdbutil.GetACLMatch(pgName, icmpAllowPolicyMatch, aclDir) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, icmpAllowACL), + types.DefaultAllowPriority, allowICMPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + } + return acls } func (bnc *BaseNetworkController) addPolicyToDefaultPortGroups(np *networkPolicy, aclLogging *libovsdbutil.ACLLoggingLevels) error { @@ -439,17 +448,18 @@ func (bnc *BaseNetworkController) delPolicyFromDefaultPortGroups(np *networkPoli func (bnc *BaseNetworkController) createDefaultDenyPGAndACLs(namespace, policy string, aclLogging *libovsdbutil.ACLLoggingLevels) error { ingressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress) ingressPGName := libovsdbutil.GetPortGroupName(ingressPGIDs) - ingressDenyACL, ingressAllowACL := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) + ingressACLs := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) egressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress) egressPGName := libovsdbutil.GetPortGroupName(egressPGIDs) - egressDenyACL, egressAllowACL := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) - ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), ingressDenyACL, ingressAllowACL, egressDenyACL, egressAllowACL) + egressACLs := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) + allACLs := append(ingressACLs, egressACLs...) + ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), allACLs...) if err != nil { return err } - ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, []*nbdb.ACL{ingressDenyACL, ingressAllowACL}) - egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, []*nbdb.ACL{egressDenyACL, egressAllowACL}) + ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, ingressACLs) + egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, egressACLs) ops, err = libovsdbops.CreateOrUpdatePortGroupsOps(bnc.nbClient, ops, ingressPG, egressPG) if err != nil { return err @@ -1307,7 +1317,6 @@ func (bnc *BaseNetworkController) deleteNetworkPolicy(policy *knet.NetworkPolicy err := bnc.networkPolicies.DoWithLock(npKey, func(npKey string) error { np, ok := bnc.networkPolicies.Load(npKey) if !ok { - klog.Infof("Deleting policy %s that is already deleted", npKey) return nil } if err := bnc.cleanupNetworkPolicy(np); err != nil { diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index 238daee738..c5481ef265 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -274,7 +274,11 @@ func (bsnc *BaseUserDefinedNetworkController) ensurePodForUserDefinedNetwork(pod } activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) + return fmt.Errorf("failed to find active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + if activeNetwork == nil { + // no active network, pod doesn't belong to our controller + return nil } } @@ -422,6 +426,9 @@ func (bsnc *BaseUserDefinedNetworkController) addLogicalPortToNetworkForNAD(pod if lsp != nil { _ = bsnc.logicalPortCache.add(pod, switchName, nadKey, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if bsnc.onLogicalPortCacheAdd != nil { + bsnc.onLogicalPortCacheAdd(pod, nadKey) + } if bsnc.requireDHCP(pod) { if err := bsnc.ensureDHCP(pod, podAnnotation, lsp); err != nil { return err @@ -624,29 +631,18 @@ func (bsnc *BaseUserDefinedNetworkController) syncPodsForUserDefinedNetwork(pods var activeNetwork util.NetInfo var err error if bsnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bsnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed to get primary network namespace NAD: %w", err) + return fmt.Errorf("failed to find the active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) } - if foundNamespaceNAD == types.DefaultNetworkName { + if activeNetwork == nil || activeNetwork.IsDefault() { + // no active network for pod, or is a default network pod continue } - networkName := bsnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != bsnc.GetNetworkName() { + if activeNetwork.GetNetworkName() != bsnc.GetNetworkName() { + // network name found but doesn't apply to our controller continue } - activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - if apierrors.IsNotFound(err) { - // namespace is gone after we listed this pod, that means the pod no longer exists - // we don't need to preserve it's previously allocated IP address or logical switch port - klog.Infof("%s network controller pod sync: pod %s/%s namespace has been deleted, ignoring pod", - bsnc.GetNetworkName(), pod.Namespace, pod.Name) - continue - } - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) - } } on, networkMap, err := util.GetPodNADToNetworkMappingWithActiveNetwork( @@ -823,6 +819,75 @@ func (bsnc *BaseUserDefinedNetworkController) WatchMultiNetworkPolicy() error { return nil } +// cleanupGatewayRoutersForNetworkFromDB discovers all gateway routers for the given network from +// the NB DB (by ExternalIDs and GWRouterPrefix) and cleans each one via a dummy GatewayManager. +// Used when gateway managers are empty (e.g. dummy controller or stale cleanup) so cleanup works +// even when nodes are gone. +func cleanupGatewayRoutersForNetworkFromDB( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName string, +) error { + var errs []error + networkName := netInfo.GetNetworkName() + pred := func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs[types.NetworkExternalID] == networkName && + strings.HasPrefix(lr.Name, types.GWRouterPrefix) + } + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, pred) + if err != nil { + return fmt.Errorf("failed to find gateway routers for network %s: %w", networkName, err) + } + layer2UseTransitRouter := netInfo.TopologyType() == types.Layer2Topology && config.Layer2UsesTransitRouter + for _, lr := range routers { + nodeName := netInfo.RemoveNetworkScopeFromName(util.GetWorkerFromGatewayRouter(lr.Name)) + gw := NewGatewayManagerForCleanup(nbClient, netInfo, clusterRouterName, joinSwitchName, lr.Name, nodeName, layer2UseTransitRouter) + if err := gw.Cleanup(); err != nil { + errs = append(errs, fmt.Errorf("failed to cleanup gateway router %s for network %q (node %s): %w", lr.Name, networkName, nodeName, err)) + } + } + return utilerrors.Join(errs...) +} + +// cleanupLoadBalancerGroups removes load balancer groups for a user-defined network controller. +// When LB group UUIDs are known (normal controller), they are deleted directly by UUID. +// Otherwise (dummy/stale cleanup controller), the groups are looked up by network-scoped name. +func cleanupLoadBalancerGroups( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID string, +) { + networkName := netInfo.GetNetworkName() + if switchLBGroupUUID != "" || clusterLBGroupUUID != "" || routerLBGroupUUID != "" { + lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) + for _, lbGroupUUID := range []string{switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID} { + if lbGroupUUID != "" { + lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) + } + } + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, lbGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", networkName, err) + } + return + } + // Dummy controller (e.g. stale UDN cleanup): find LB groups by network-scoped name and delete them + names := map[string]bool{ + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterSwitchLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterRouterLBGroupName): true, + } + staleLBGroups, err := libovsdbops.FindLoadBalancerGroupsWithPredicate(nbClient, func(g *nbdb.LoadBalancerGroup) bool { + return names[g.Name] + }) + if err != nil { + klog.Errorf("Failed to find load balancer groups for stale network %q: %v", networkName, err) + } else if len(staleLBGroups) > 0 { + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, staleLBGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on stale network: %q, error: %v", networkName, err) + } + } +} + // cleanupPolicyLogicalEntities cleans up all the port groups and address sets that belong to the given controller func cleanupPolicyLogicalEntities(nbClient libovsdbclient.Client, ops []ovsdb.Operation, controllerName string) ([]ovsdb.Operation, error) { var err error diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index e28c138247..5a5f7afc79 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -150,11 +150,11 @@ func (oc *BaseLayer2UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } // Add ourselves to the route import manager diff --git a/go-controller/pkg/ovn/controller/admin_network_policy/status.go b/go-controller/pkg/ovn/controller/admin_network_policy/status.go index 5ffb2fcc2d..828f159370 100644 --- a/go-controller/pkg/ovn/controller/admin_network_policy/status.go +++ b/go-controller/pkg/ovn/controller/admin_network_policy/status.go @@ -46,6 +46,22 @@ const ( policyNotReadyReason = "SetupFailed" ) +// doesStatusNeedAnUpdate compares the existing condition with the new condition +// and returns true if an update is needed, false if the status is already in the desired state. +// This helps avoid unnecessary API server calls when the status hasn't changed. +func doesStatusNeedAnUpdate(existingCondition *metav1.Condition, newCondition metav1.Condition) bool { + if existingCondition == nil { + return true // condition doesn't exist yet, needs to be created + } + // Check if Status, Reason, and Message are all the same - if so, no update needed + if existingCondition.Status == newCondition.Status && + existingCondition.Reason == newCondition.Reason && + existingCondition.Message == newCondition.Message { + return false + } + return true +} + // updateANPStatusToReady updates the status of the policy to reflect that it is ready // Each zone's ovnkube-controller will call this, hence let's update status using server-side-apply func (c *Controller) updateANPStatusToReady(anpName string) error { @@ -59,8 +75,6 @@ func (c *Controller) updateANPStatusToReady(anpName string) error { if err != nil { return fmt.Errorf("unable to update the status of ANP %s, err: %v", anpName, err) } - klog.V(5).Infof("Patched the status of ANP %v with condition type %v/%v", - anpName, policyReadyStatusType+c.zone, metav1.ConditionTrue) return nil } @@ -83,8 +97,6 @@ func (c *Controller) updateANPStatusToNotReady(anpName, message string) error { if err != nil { return fmt.Errorf("unable update the status of ANP %s, err: %v", anpName, err) } - klog.V(3).Infof("Patched the status of ANP %v with condition type %v/%v and reason %s/%s", - anpName, policyReadyStatusType+c.zone, metav1.ConditionFalse, policyNotReadyReason, message) return nil } @@ -94,6 +106,10 @@ func (c *Controller) updateANPZoneStatusCondition(newCondition metav1.Condition, return err } existingCondition := meta.FindStatusCondition(anp.Status.Conditions, newCondition.Type) + if !doesStatusNeedAnUpdate(existingCondition, newCondition) { + // status is already in the desired state, skip the update to reduce API server load + return nil + } if existingCondition == nil { newCondition.LastTransitionTime = metav1.NewTime(time.Now()) } else { @@ -109,6 +125,10 @@ func (c *Controller) updateANPZoneStatusCondition(newCondition metav1.Condition, WithStatus(anpapiapply.AdminNetworkPolicyStatus().WithConditions(newCondition)) _, err = c.anpClientSet.PolicyV1alpha1().AdminNetworkPolicies(). ApplyStatus(context.TODO(), applyObj, metav1.ApplyOptions{FieldManager: c.zone, Force: true}) + if err == nil { + klog.V(5).Infof("Patched the status of ANP %s with condition type %s/%s, reason %s, message: %s", + anpName, newCondition.Type, newCondition.Status, newCondition.Reason, newCondition.Message) + } return err } @@ -125,8 +145,6 @@ func (c *Controller) updateBANPStatusToReady(banpName string) error { if err != nil { return fmt.Errorf("unable to update the status of BANP %s, err: %v", banpName, err) } - klog.V(5).Infof("Patched the status of BANP %v with condition type %v/%v", - banpName, policyReadyStatusType+c.zone, metav1.ConditionTrue) return nil } @@ -146,8 +164,6 @@ func (c *Controller) updateBANPStatusToNotReady(banpName, message string) error if err != nil { return fmt.Errorf("unable update the status of BANP %s, err: %v", banpName, err) } - klog.V(3).Infof("Patched the status of BANP %v with condition type %v/%v and reason %s", - banpName, policyReadyStatusType+c.zone, metav1.ConditionFalse, policyNotReadyReason) return nil } @@ -157,6 +173,10 @@ func (c *Controller) updateBANPZoneStatusCondition(newCondition metav1.Condition return err } existingCondition := meta.FindStatusCondition(banp.Status.Conditions, newCondition.Type) + if !doesStatusNeedAnUpdate(existingCondition, newCondition) { + // status is already in the desired state, skip the update to reduce API server load + return nil + } if existingCondition == nil { newCondition.LastTransitionTime = metav1.NewTime(time.Now()) } else { @@ -172,5 +192,9 @@ func (c *Controller) updateBANPZoneStatusCondition(newCondition metav1.Condition WithStatus(anpapiapply.BaselineAdminNetworkPolicyStatus().WithConditions(newCondition)) _, err = c.anpClientSet.PolicyV1alpha1().BaselineAdminNetworkPolicies(). ApplyStatus(context.TODO(), applyObj, metav1.ApplyOptions{FieldManager: c.zone, Force: true}) + if err == nil { + klog.V(5).Infof("Patched the status of BANP %s with condition type %s/%s, reason %s, message: %s", + banpName, newCondition.Type, newCondition.Status, newCondition.Reason, newCondition.Message) + } return err } diff --git a/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go b/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go index 6a28fa60d3..02d2ece268 100644 --- a/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go +++ b/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go @@ -147,6 +147,177 @@ func newANPControllerWithDBSetup(dbSetup libovsdbtest.TestSetup, initANPs anpapi return controller, nil } +func TestDoesStatusNeedAnUpdate(t *testing.T) { + tests := []struct { + name string + existingCondition *metav1.Condition + newCondition metav1.Condition + expectedResult bool + }{ + { + name: "nil existing condition should need update", + existingCondition: nil, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: true, + }, + { + name: "same status, reason, message should not need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: false, + }, + { + name: "different status should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionFalse, + Reason: "SetupFailed", + Message: "error", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: true, + }, + { + name: "different reason should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "OldReason", + Message: "success", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "NewReason", + Message: "success", + }, + expectedResult: true, + }, + { + name: "different message should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "old message", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "new message", + }, + expectedResult: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := doesStatusNeedAnUpdate(tt.existingCondition, tt.newCondition) + if result != tt.expectedResult { + t.Errorf("doesStatusNeedAnUpdate() = %v, want %v", result, tt.expectedResult) + } + }) + } +} + +func TestStatusUpdateSkippedWhenUnchanged(t *testing.T) { + g := gomega.NewGomegaWithT(t) + controller, err := newANPController( + anpapi.AdminNetworkPolicyList{ + Items: []anpapi.AdminNetworkPolicy{initialANP}, + }, + anpapi.BaselineAdminNetworkPolicyList{ + Items: []anpapi.BaselineAdminNetworkPolicy{initialBANP}, + }, + ) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // First call - should make an API call to set status to Ready + err = controller.updateANPStatusToReady(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for the status to be reflected in the lister + g.Eventually(func() int { + latestANP, err := controller.anpLister.Get(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + return len(latestANP.Status.Conditions) + }).Should(gomega.Equal(1)) + + // Get the number of actions after first update + actionsAfterANPFirstUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + + // Second call with same status - should NOT make an API call + err = controller.updateANPStatusToReady(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify no new actions were added (ApplyStatus was skipped) + actionsAfterANPSecondUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterANPSecondUpdate).To(gomega.Equal(actionsAfterANPFirstUpdate), + "Expected no new API calls when status is unchanged, but got %d new actions", + actionsAfterANPSecondUpdate-actionsAfterANPFirstUpdate) + + // Third call with different status (NotReady) - SHOULD make an API call + err = controller.updateANPStatusToNotReady(initialANP.Name, "something went wrong") + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify a new action WAS added (ApplyStatus was called) + actionsAfterANPThirdUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterANPThirdUpdate).To(gomega.Equal(actionsAfterANPFirstUpdate+1), + "Expected 1 new API call when status changed to NotReady, but got %d new actions", + actionsAfterANPThirdUpdate-actionsAfterANPSecondUpdate) + + // Now test BANP + err = controller.updateBANPStatusToReady(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + g.Eventually(func() int { + latestBANP, err := controller.banpLister.Get(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + return len(latestBANP.Status.Conditions) + }).Should(gomega.Equal(1)) + + actionsAfterBANPFirstUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + + // Second call with same status - should NOT make an API call + err = controller.updateBANPStatusToReady(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + actionsAfterBANPSecondUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterBANPSecondUpdate).To(gomega.Equal(actionsAfterBANPFirstUpdate), + "Expected no new API calls when BANP status is unchanged") + + // Third call with different status (NotReady) - SHOULD make an API call + err = controller.updateBANPStatusToNotReady(initialBANP.Name, "something went wrong") + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify a new action WAS added (ApplyStatus was called) + actionsAfterBANPThirdUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterBANPThirdUpdate).To(gomega.Equal(actionsAfterBANPFirstUpdate+1), + "Expected 1 new API call when BANP status changed to NotReady, but got %d new actions", + actionsAfterBANPThirdUpdate-actionsAfterBANPSecondUpdate) +} + func TestAddOrUpdateAdminNetworkPolicyStatus(t *testing.T) { anpName := "harry-potter" banpName := "jon-snow" diff --git a/go-controller/pkg/ovn/controller/apbroute/repair.go b/go-controller/pkg/ovn/controller/apbroute/repair.go index 75c50765b1..56867f82e5 100644 --- a/go-controller/pkg/ovn/controller/apbroute/repair.go +++ b/go-controller/pkg/ovn/controller/apbroute/repair.go @@ -155,7 +155,7 @@ func (c *ExternalGatewayMasterController) Repair() error { // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode if !podHasAnyECMPRoutes { for _, ovnRoute := range ovnRoutes { - node := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) if err := c.nbClient.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { return fmt.Errorf("error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", podIP, node, err) diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go index fbcacf04b6..94f87f0ff2 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go @@ -484,7 +484,7 @@ var _ = ginkgo.Describe("OVN test basic functions", func() { subnets = append(subnets, config.CIDRNetworkEntry{CIDR: cidr}) } config.Default.ClusterSubnets = subnets - entry := &cacheEntry{} + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} output, err := efController.newEgressFirewallRule("default", tc.egressFirewallRule, tc.id, entry) if tc.err == true { gomega.Expect(err).To(gomega.HaveOccurred()) @@ -716,8 +716,13 @@ func TestValidateAndGetEgressFirewallDestination(t *testing.T) { if len(tc.udnName) > 0 { network = tc.udnName } + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} + if len(tc.udnName) > 0 { + entry.subnets = subnetsForNetInfo(netInfo) + } + cidrSelector, dnsName, clusterSubnetIntersection, nodeSelector, err := - efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination) + efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination, entry) if tc.expectedErr { require.Error(t, err) } else { diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go index d537804f56..8d9dc37403 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go @@ -110,7 +110,7 @@ type matchKind int type cacheEntry struct { pgName string hasNodeSelector bool - subnetsKey string + subnets []*net.IPNet efResourceVersion string logHash string } @@ -422,20 +422,15 @@ func (oc *EFController) sync(key string) (updateErr error) { }() activeNetwork, netErr := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if netErr != nil { - if util.IsUnprocessedActiveNetworkError(netErr) { - klog.V(5).Infof("Skipping egress firewall %s/%s: primary network not ready: %v", namespace, efName, netErr) - skipStatusUpdate = true - return nil - } - if util.IsInvalidPrimaryNetworkError(netErr) { - // Namespace requires P-UDN, but it does not exist. Remove EF config and surface error in status. - updateErr = netErr - } else { - return fmt.Errorf("failed to get active network for egress firewall %s/%s namespace: %w", - namespace, efName, netErr) - } - } else { + switch { + case netErr != nil: + // Failed to resolve active network; surface this in EF status. + updateErr = netErr + case activeNetwork == nil: + // No active network for this namespace in this controller context (e.g. filtered by D-UDN): + // cleanup stale EF config but don't report an EF status error. + skipStatusUpdate = true + default: aclLoggingLevels, logErr := oc.getNamespaceACLLogging(namespace) if logErr != nil { return fmt.Errorf("failed to get acl logging levels for egress firewall %s/%s: %w", @@ -444,7 +439,7 @@ func (oc *EFController) sync(key string) (updateErr error) { ownerController := activeNetwork.GetNetworkName() + "-network-controller" newEntry = &cacheEntry{ pgName: libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), - subnetsKey: subnetsKeyForNetInfo(activeNetwork), + subnets: subnetsForNetInfo(activeNetwork), efResourceVersion: ef.ResourceVersion, logHash: aclLogHash(aclLoggingLevels), } @@ -540,20 +535,19 @@ func (oc *EFController) sync(key string) (updateErr error) { return } -func subnetsKeyForNetInfo(netInfo util.NetInfo) string { +func subnetsForNetInfo(netInfo util.NetInfo) []*net.IPNet { if netInfo == nil { - return "" + return nil } subnets := netInfo.Subnets() - if len(subnets) == 0 { - return "" - } - keys := make([]string, 0, len(subnets)) - for _, s := range subnets { - keys = append(keys, s.String()) + unsortedSubnets := make([]*net.IPNet, 0, len(subnets)) + for _, subnet := range subnets { + if subnet.CIDR == nil { + continue + } + unsortedSubnets = append(unsortedSubnets, subnet.CIDR) } - slices.Sort(keys) - return strings.Join(keys, ",") + return util.CopyIPNets(unsortedSubnets) } func entriesEqual(a, b *cacheEntry) bool { @@ -564,7 +558,7 @@ func entriesEqual(a, b *cacheEntry) bool { return false default: return a.pgName == b.pgName && - a.subnetsKey == b.subnetsKey && + util.IsIPNetsEqual(a.subnets, b.subnets) && a.efResourceVersion == b.efResourceVersion && a.logHash == b.logHash } @@ -624,7 +618,7 @@ func (oc *EFController) addEgressFirewall(egressFirewall *egressfirewallapi.Egre // validateAndGetEgressFirewallDestination validates an egress firewall rule destination and returns // the parsed contents of the destination. -func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination) ( +func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination, entry *cacheEntry) ( cidrSelector string, dnsName string, clusterSubnetIntersection []*net.IPNet, @@ -644,15 +638,13 @@ func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string return "", "", nil, nil, err } cidrSelector = egressFirewallDestination.CIDRSelector - netInfo, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if err != nil { - return "", "", nil, nil, - fmt.Errorf("failed to validate egress firewall destination: %w", err) + if entry == nil || entry.subnets == nil { + return "", "", nil, nil, fmt.Errorf("failed to "+ + "validate egress firewall destination: missing cached subnets for namespace %s", namespace) } - subnets := netInfo.Subnets() - for _, clusterSubnet := range subnets { - if clusterSubnet.CIDR.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.CIDR.IP) { - clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet.CIDR) + for _, clusterSubnet := range entry.subnets { + if clusterSubnet.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.IP) { + clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet) } } } else { @@ -680,7 +672,7 @@ func (oc *EFController) newEgressFirewallRule(namespace string, rawEgressFirewal // fields of efr. var err error efr.to.cidrSelector, efr.to.dnsName, efr.to.clusterSubnetIntersection, efr.to.nodeSelector, err = - oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To) + oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To, entry) if err != nil { return efr, err } @@ -948,8 +940,8 @@ func (oc *EFController) moveACLsToNamespacedPortGroups(existingEFNamespaces map[ if namespace != "" && existingEFNamespaces[namespace] { pgName, err := oc.getNamespacePortGroupName(namespace) if err != nil { - return fmt.Errorf("failed to get port group name for egress firewall ACL move with "+ - "namespace: %s, err: %w", namespace, err) + klog.Warningf("Skipping egress firewall ACL move for namespace %s: %v", namespace, err) + continue } // re-attach from ClusterPortGroupNameBase to namespaced port group. // port group should exist, because namespace handler will create it. @@ -1088,11 +1080,18 @@ func getNamespacePortGroupDbIDs(ns string, controller string) *libovsdbops.DbObj } func (oc *EFController) getNamespacePortGroupName(namespace string) (string, error) { - activeNetwork, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) + nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - return "", fmt.Errorf("failed to get active network for namespace %s: %w", namespace, err) + return "", fmt.Errorf("failed to get primary NAD for namespace %s: %w", namespace, err) + } + networkName := types.DefaultNetworkName + if nadKey != types.DefaultNetworkName && nadKey != "" { + networkName = oc.networkManager.GetNetworkNameForNADKey(nadKey) + if networkName == "" { + return "", fmt.Errorf("failed to resolve network name for NAD %s in namespace %s", nadKey, namespace) + } } - ownerController := activeNetwork.GetNetworkName() + "-network-controller" + ownerController := networkName + "-network-controller" return libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), nil } diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go index eb280d5109..5c6ec709c6 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go @@ -187,5 +187,5 @@ func TestEFControllerSync_UpdatesOnSubnetChangeAndSkipsWhenUnchanged(t *testing. entry, ok := oc.cache.Load(namespace) require.True(t, ok) require.Equal(t, pgName, entry.pgName) - require.Equal(t, subnetsKeyForNetInfo(netInfo2), entry.subnetsKey) + require.True(t, util.IsIPNetsEqual(subnetsForNetInfo(netInfo2), entry.subnets)) } diff --git a/go-controller/pkg/ovn/controller/networkconnect/controller_test.go b/go-controller/pkg/ovn/controller/networkconnect/controller_test.go index 45884776fe..f683a38e23 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/controller_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/controller_test.go @@ -144,6 +144,7 @@ func createTestNode(n testNode) *corev1.Node { if len(n.nodeSubnets) > 0 { annotations[ovnNodeSubnetsAnnotation] = buildNodeSubnetAnnotation(n.nodeSubnets) } + annotations[util.OvnNodeChassisID] = chassisIDForNode(n.name) return &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ diff --git a/go-controller/pkg/ovn/controller/networkconnect/topology.go b/go-controller/pkg/ovn/controller/networkconnect/topology.go index 2b54570ce4..471fc0d93b 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/topology.go +++ b/go-controller/pkg/ovn/controller/networkconnect/topology.go @@ -557,8 +557,15 @@ func (c *Controller) ensureConnectPortsOps(ops []ovsdb.Operation, cnc *networkco } else { // Remote node: create only the connect-router side port with requested-chassis set // This makes the port type: remote in SB, enabling cross-zone tunneling + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + return nil, ovntypes.NewSuppressedError(err) + } + return nil, fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } ops, err = c.createRouterPortOps(ops, connectRouterName, connectPortName, portPairInfo.connectPortIPs, - "", cncName, networkID, nodeID, tunnelKey, node.Name) + "", cncName, networkID, nodeID, tunnelKey, chassisID) if err != nil { return nil, fmt.Errorf("failed to create remote connect router port ops %s: %v", connectPortName, err) } diff --git a/go-controller/pkg/ovn/controller/networkconnect/topology_test.go b/go-controller/pkg/ovn/controller/networkconnect/topology_test.go index ddb5ad624e..b182f4099e 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/topology_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/topology_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/onsi/gomega" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -32,6 +33,10 @@ import ( mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks/multinetwork" ) +func chassisIDForNode(nodeName string) string { + return uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() +} + type testNetworkManager struct { networkmanager.FakeNetworkManager nodeHas map[string]bool @@ -377,7 +382,7 @@ func TestCreateRouterPortOps(t *testing.T) { networkID: 1, nodeID: 2, tunnelKey: 101, - remoteChassisName: "node2", + remoteChassisName: chassisIDForNode("node2"), initialDB: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ UUID: "router-uuid", @@ -555,6 +560,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node1", Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), util.OvnNodeZoneName: "node1", // local zone }, }, @@ -587,6 +593,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node2", // remote node Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), util.OvnNodeZoneName: "node2", // different zone }, }, @@ -619,6 +626,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node1", // local node Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), util.OvnNodeZoneName: "node1", // local zone }, }, @@ -628,6 +636,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node2", // remote node Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), util.OvnNodeZoneName: "node2", // different zone }, }, @@ -833,7 +842,7 @@ func TestCleanupNetworkConnections(t *testing.T) { libovsdbops.RouterNameKey.String(): "connect_router_test-cnc", }, Options: map[string]string{ - libovsdbops.RequestedChassis: "node2", + libovsdbops.RequestedChassis: chassisIDForNode("node2"), }, // Remote port has no peer }, @@ -916,6 +925,7 @@ func TestSyncNetworkConnectionsInactiveNetwork(t *testing.T) { Annotations: map[string]string{ util.OvnNodeZoneName: "zone1", util.OvnNodeID: "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), "k8s.ovn.org/node-subnets": string(subnetsBytes), }, }, @@ -1496,6 +1506,7 @@ func TestEnsureRoutingPoliciesOps(t *testing.T) { Name: "node1", Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), }, }, }, @@ -1504,6 +1515,7 @@ func TestEnsureRoutingPoliciesOps(t *testing.T) { Name: "node2", Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), }, }, }, diff --git a/go-controller/pkg/ovn/controller/services/node_tracker.go b/go-controller/pkg/ovn/controller/services/node_tracker.go index 341764904a..7079ba2081 100644 --- a/go-controller/pkg/ovn/controller/services/node_tracker.go +++ b/go-controller/pkg/ovn/controller/services/node_tracker.go @@ -56,10 +56,6 @@ type nodeInfo struct { // The node's zone zone string - /** HACK BEGIN **/ - // has the node migrated to remote? - migrated bool - /** HACK END **/ // The list of node's management IPs mgmtIPs []net.IP @@ -127,7 +123,6 @@ func (nt *nodeTracker) Start(nodeInformer coreinformers.NodeInformer) (cache.Res oldObj.Name != newObj.Name || util.NodeHostCIDRsAnnotationChanged(oldObj, newObj) || util.NodeZoneAnnotationChanged(oldObj, newObj) || - util.NodeMigratedZoneAnnotationChanged(oldObj, newObj) || util.NoHostSubnet(oldObj) != util.NoHostSubnet(newObj) { nt.updateNode(newObj) } @@ -154,7 +149,7 @@ func (nt *nodeTracker) Start(nodeInformer coreinformers.NodeInformer) (cache.Res // updateNodeInfo updates the node info cache, and syncs all services // if it changed. -func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet, mgmtIPs []net.IP, zone string, nodePortDisabled, migrated bool) { +func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet, mgmtIPs []net.IP, zone string, nodePortDisabled bool) { ni := nodeInfo{ name: nodeName, l3gatewayAddresses: l3gatewayAddresses, @@ -166,7 +161,6 @@ func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisI chassisID: chassisID, nodePortDisabled: nodePortDisabled, zone: zone, - migrated: migrated, } for i := range podSubnets { ni.podSubnets = append(ni.podSubnets, *podSubnets[i]) // de-pointer @@ -275,7 +269,6 @@ func (nt *nodeTracker) updateNode(node *corev1.Node) { mgmtIPs, util.GetNodeZone(node), !nodePortEnabled, - util.HasNodeMigratedZone(node), ) } @@ -285,24 +278,6 @@ func (nt *nodeTracker) updateNode(node *corev1.Node) { func (nt *nodeTracker) getZoneNodes() []nodeInfo { out := make([]nodeInfo, 0, len(nt.nodes)) for _, node := range nt.nodes { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. Legacy ovnkube-master must not delete the service load - // balancers for this node till it has finished migration - if nt.zone == types.OvnDefaultZone { - if !node.migrated { - out = append(out, node) - } - continue - } - /** HACK END **/ if node.zone == nt.zone { out = append(out, node) } diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 83ccedca49..428d75324d 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -600,22 +600,36 @@ func (c *Controller) RequestFullSync(nodeInfos []nodeInfo) { // belong to the network that this service controller is responsible for. func (c *Controller) skipService(name, namespace string) bool { if util.IsNetworkSegmentationSupportEnabled() { - serviceNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace) + serviceNAD, err := c.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { + // If the namespace requires a UDN that hasn't been processed yet, the default controller + // should skip this service; the UDN controller will handle it once ready. + if util.IsInvalidPrimaryNetworkError(err) { + return c.netInfo.IsDefault() + } utilruntime.HandleError(fmt.Errorf("failed to retrieve network for service %s/%s: %w", namespace, name, err)) return true } + serviceNetworkName := types.DefaultNetworkName + isDefaultNetwork := serviceNAD == types.DefaultNetworkName + if !isDefaultNetwork { + serviceNetworkName = c.networkManager.GetNetworkNameForNADKey(serviceNAD) + if serviceNetworkName == "" { + return true + } + } + // Do not skip default network services enabled for UDN - if serviceNetwork.IsDefault() && + if isDefaultNetwork && c.netInfo.IsPrimaryNetwork() && globalconfig.Gateway.Mode == globalconfig.GatewayModeShared && util.IsUDNEnabledService(ktypes.NamespacedName{Namespace: namespace, Name: name}.String()) { return false } - if serviceNetwork.GetNetworkName() != c.netInfo.GetNetworkName() { + if serviceNetworkName != c.netInfo.GetNetworkName() { return true } } diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index dd27486c5b..61023878ad 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -248,6 +248,9 @@ func newDefaultNetworkControllerCommon( oc.ovnClusterLRPToJoinIfAddrs = gwLRPIfAddrs oc.initRetryFramework() + if oc.eIPC != nil { + oc.eIPC.retryEgressIPPods = oc.retryEgressIPPods + } return oc, nil } @@ -343,6 +346,9 @@ func (oc *DefaultNetworkController) Stop() { if oc.efController != nil { oc.efController.Stop() } + if oc.eIPC != nil { + oc.eIPC.StopNADReconciler() + } if oc.routeImportManager != nil { oc.routeImportManager.ForgetNetwork(oc.GetNetworkName()) } @@ -459,6 +465,9 @@ func (oc *DefaultNetworkController) run(_ context.Context) error { } if config.OVNKubernetesFeature.EnableEgressIP { + if err := oc.eIPC.StartNADReconciler(); err != nil { + return err + } // This is probably the best starting order for all egress IP handlers. // WatchEgressIPPods and WatchEgressIPNamespaces only use the informer // cache to retrieve the egress IPs when determining if namespace/pods @@ -764,14 +773,14 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.PodType: pod, ok := obj.(*corev1.Pod) if !ok { - return fmt.Errorf("could not cast %T object to *knet.Pod", obj) + return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) } return h.oc.ensurePod(nil, pod, true) case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast %T object to *kapi.Node", obj) + return fmt.Errorf("could not cast %T object to *corev1.Node", obj) } if config.HybridOverlay.Enabled { if util.NoHostSubnet(node) { @@ -895,7 +904,7 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.NamespaceType: ns, ok := obj.(*corev1.Namespace) if !ok { - return fmt.Errorf("could not cast %T object to *kapi.Namespace", obj) + return fmt.Errorf("could not cast %T object to *corev1.Namespace", obj) } return h.oc.AddNamespace(ns) @@ -919,11 +928,11 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int case factory.NodeType: newNode, ok := newObj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast newObj of type %T to *kapi.Node", newObj) + return fmt.Errorf("could not cast newObj of type %T to *corev1.Node", newObj) } oldNode, ok := oldObj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast oldObj of type %T to *kapi.Node", oldObj) + return fmt.Errorf("could not cast oldObj of type %T to *corev1.Node", oldObj) } var switchToOvnNode bool if config.HybridOverlay.Enabled { @@ -1125,7 +1134,7 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast obj of type %T to *knet.Node", obj) + return fmt.Errorf("could not cast obj of type %T to *corev1.Node", obj) } return h.oc.deleteNodeEvent(node) diff --git a/go-controller/pkg/ovn/egressgw_test.go b/go-controller/pkg/ovn/egressgw_test.go index 9b6f4810eb..1e93c4fa6d 100644 --- a/go-controller/pkg/ovn/egressgw_test.go +++ b/go-controller/pkg/ovn/egressgw_test.go @@ -136,7 +136,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -172,7 +172,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -276,7 +276,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -312,7 +312,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -420,7 +420,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -466,7 +466,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -898,7 +898,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -969,7 +969,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1079,7 +1079,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1119,7 +1119,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1240,7 +1240,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1280,7 +1280,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1411,7 +1411,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1451,7 +1451,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1592,7 +1592,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1632,7 +1632,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1665,7 +1665,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1711,7 +1711,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1745,7 +1745,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1785,7 +1785,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1939,7 +1939,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1979,7 +1979,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2013,7 +2013,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2053,7 +2053,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2086,7 +2086,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2132,7 +2132,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2165,7 +2165,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2211,7 +2211,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2245,7 +2245,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2285,7 +2285,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2330,7 +2330,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2370,7 +2370,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2484,7 +2484,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2618,7 +2618,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2759,7 +2759,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2951,7 +2951,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, @@ -3133,7 +3133,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, @@ -3704,7 +3704,7 @@ func injectNode(fakeOvn *FakeOVN) { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{"k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"169.254.33.2/24", "next-hop":"169.254.33.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("node1"), "k8s.ovn.org/node-subnets": `{"default":"10.128.1.0/24"}`, }, }, diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 75662bd2c4..4c6850e3b2 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -22,8 +22,10 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" + "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -32,6 +34,7 @@ import ( ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" @@ -44,6 +47,7 @@ import ( addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/udnenabledsvc" + ovnretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -192,7 +196,12 @@ type EgressIPController struct { // value will be true if local to this zone and false otherwise nodeZoneState *syncmap.SyncMap[bool] // networkManager used for getting network information for UDNs - networkManager networkmanager.Interface + networkManager networkmanager.Interface + nadReconciler networkmanager.NADReconciler + nadReconcilerID uint64 + nadReconcilerRegistered bool + // retryEgressIPPods allows requeuing egressIP pod processing on NAD changes + retryEgressIPPods *ovnretry.RetryFramework // An address set factory that creates address sets addressSetFactory addressset.AddressSetFactory // Northbound database zone name to which this Controller is connected to - aka local zone @@ -233,6 +242,16 @@ func NewEIPController( v4: v4, v6: v6, } + nadReconcilerConfig := &controller.ReconcilerConfig{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Reconcile: e.syncNAD, + Threadiness: 1, + MaxAttempts: controller.InfiniteAttempts, + } + e.nadReconciler = controller.NewReconciler( + controllerName+"-egressip-nad", + nadReconcilerConfig, + ) return e } @@ -364,7 +383,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) @@ -373,7 +400,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) @@ -399,7 +434,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) @@ -408,7 +451,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) @@ -431,7 +482,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e // reason to look at the pod selector. ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { @@ -538,8 +597,16 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa if namespaceSelector.Matches(oldLabels) && !namespaceSelector.Matches(newLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) } + if ni == nil { + // our node does not have this network + return nil + } if err := e.deleteNamespaceEgressIPAssignment(ni, eIP.Name, eIP.Status.Items, oldNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %q for egress IP %q: %w", ni.GetNetworkName(), namespaceName, eIP.Name, err) @@ -549,7 +616,15 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa mark := getEgressIPPktMark(eIP.Name, eIP.Annotations) ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespaceName, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) + } + if ni == nil { + // our node does not have this network + return nil } if err := e.addNamespaceEgressIPAssignments(ni, eIP.Name, eIP.Status.Items, mark, newNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to add namespace %q for egress IP %q: %w", @@ -653,15 +728,13 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) - if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) - } + oldMatches, newMatches := false, false + deletePath := false if !podSelector.Empty() { // Use "new" and "old" instead of "newPod" and "oldPod" to determine whether // pods was created or is being deleted. - newMatches := new != nil && podSelector.Matches(newPodLabels) - oldMatches := old != nil && podSelector.Matches(oldPodLabels) + newMatches = new != nil && podSelector.Matches(newPodLabels) + oldMatches = old != nil && podSelector.Matches(oldPodLabels) // If the podSelector doesn't match the pod, then continue // because this EgressIP intends to match other pods in that // namespace and not this one. Other EgressIP objects might @@ -671,7 +744,32 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err } // Check if the pod stopped matching. If the pod was deleted, // "new" will be nil, so this must account for that case. - if !newMatches && oldMatches { + deletePath = !newMatches && oldMatches + } else { + // Empty pod selector means all pods in namespace are matched. + deletePath = new == nil + } + + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + if err != nil && !util.IsInvalidPrimaryNetworkError(err) { + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + haveNetwork := ni != nil + if !haveNetwork && deletePath && old != nil { + // During dynamic UDN churn, active network resolution can transiently return !ok on delete. + // Fall back to the pod-assignment cache network to avoid skipping stale egressIP cleanup. + if cachedNetwork := e.getNetworkFromPodAssignment(getPodKey(oldPod)); cachedNetwork != nil { + ni = cachedNetwork + haveNetwork = true + klog.V(4).Infof("Using cached network %q for egressIP delete reconciliation of pod %s/%s", + ni.GetNetworkName(), oldPod.Namespace, oldPod.Name) + } + } + if !haveNetwork { + return nil + } + if !podSelector.Empty() { + if deletePath { if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, eIP.Name, eIP.Status.Items, oldPod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s for egress IP %q: %w", ni.GetNetworkName(), oldPod.Namespace, oldPod.Name, eIP.Name, err) @@ -727,8 +825,14 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen for _, namespace := range namespaces { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + continue + } return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if ni == nil { + continue + } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { errs = append(errs, err) } @@ -789,7 +893,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str if len(statusAssignments) == 0 { return nil } - var remainingAssignments, staleAssignments []egressipv1.EgressIPStatusItem + var remainingAssignments, staleAssignments, reprogramAssignments []egressipv1.EgressIPStatusItem nadKey, err := e.getPodNADKeyForNetwork(ni, pod) if err != nil { return err @@ -816,6 +920,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str network: ni, } } else if podState.egressIPName == name || podState.egressIPName == "" { + podIPsChanged := !podIPSliceEqual(podState.podIPs, podIPs) // We do the setup only if this egressIP object is the one serving this pod OR // podState.egressIPName can be empty if no re-routes were found in // syncPodAssignmentCache for the existing pod, we will treat this case as a new add @@ -824,6 +929,10 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str // (meaning it was populated during EIP sync and needs to be processed for the pod). if value, exists := podState.egressStatuses.statusMap[status]; !exists || value == egressStatusStatePending { remainingAssignments = append(remainingAssignments, status) + } else if podIPsChanged { + // A pod can be re-created with the same name but a different IP. + // Force a delete+add for existing statuses so LRP match/NAT gets updated. + reprogramAssignments = append(reprogramAssignments, status) } // Detect stale EIP status entries (same EgressIP reassigned to a different node) // and queue the outdated entry for cleanup. @@ -831,7 +940,6 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str staleAssignments = append(staleAssignments, *staleStatus) } } - podState.podIPs = podIPs podState.egressIPName = name podState.network = ni podState.standbyEgressIPNames.Delete(name) @@ -865,6 +973,18 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str } delete(podState.egressStatuses.statusMap, staleStatus) } + if len(reprogramAssignments) > 0 { + klog.V(2).Infof("Pod %s IPs changed, forcing egress IP status reprogram for statuses: %+v", podKey, reprogramAssignments) + if err := e.deletePodEgressIPAssignments(ni, name, reprogramAssignments, pod, false); err != nil { + return fmt.Errorf("failed to force reprogram of pod %s statuses %v for egress IP %s: %w", + podKey, reprogramAssignments, name, err) + } + for _, status := range reprogramAssignments { + delete(podState.egressStatuses.statusMap, status) + } + remainingAssignments = append(remainingAssignments, reprogramAssignments...) + } + podState.podIPs = podIPs // We store podState into podAssignment cache at this place for two reasons. // 1. When podAssignmentState is newly created. // 2. deletePodEgressIPAssignments might clean the podAssignment cache, make sure we add it back. @@ -1174,20 +1294,6 @@ func (e *EgressIPController) isPodScheduledinLocalZone(pod *corev1.Pod) bool { // isLocalZoneNode returns true if the node is part of the local zone. func (e *EgressIPController) isLocalZoneNode(node *corev1.Node) bool { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - if e.zone == types.OvnDefaultZone { - return !util.HasNodeMigratedZone(node) - } - /** HACK END **/ return util.GetNodeZone(node) == e.zone } @@ -1320,6 +1426,93 @@ func (e *EgressIPController) getALocalZoneNodeName() (string, error) { return "", fmt.Errorf("failed to find a local OVN zone Node") } +func (e *EgressIPController) StartNADReconciler() error { + if e.networkManager == nil || e.nadReconciler == nil { + return nil + } + if !e.nadReconcilerRegistered { + id, err := e.networkManager.RegisterNADReconciler(e.nadReconciler) + if err != nil { + return err + } + e.nadReconcilerID = id + e.nadReconcilerRegistered = true + } + return controller.Start(e.nadReconciler) +} + +func (e *EgressIPController) StopNADReconciler() { + if e.nadReconciler == nil { + return + } + if e.nadReconcilerRegistered { + if err := e.networkManager.DeRegisterNADReconciler(e.nadReconcilerID); err != nil { + klog.Warningf("Failed to deregister egress IP NAD reconciler: %v", err) + } + e.nadReconcilerRegistered = false + } + controller.Stop(e.nadReconciler) + e.nadReconcilerID = 0 + e.nadReconciler = nil +} + +func (e *EgressIPController) syncNAD(key string) error { + startTime := time.Now() + klog.V(5).Infof("Egress IP NAD reconcile %s", key) + defer func() { + klog.V(4).Infof("Finished syncing Egress IP for NAD %s, took %v", key, time.Since(startTime)) + }() + + namespace, _, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + klog.Errorf("Failed splitting NAD key %s: %v", key, err) + return nil + } + + ni := e.networkManager.GetNetInfoForNADKey(key) + if ni == nil { + return nil + } + // Only reconcile for primary network NADs. Secondary NADs are irrelevant for EgressIP. + if !ni.IsPrimaryNetwork() { + return nil + } + // Ensure egressIP pods for this namespace are retried after NAD processing so + // we don't miss the UDN IPs if pod updates raced the NAD event. + e.addEgressIPPodRetriesForNamespace(namespace) + return nil +} + +func (e *EgressIPController) addEgressIPPodRetriesForNamespace(namespace string) { + if e.retryEgressIPPods == nil { + return + } + pods, err := e.watchFactory.GetPods(namespace) + if err != nil { + klog.Warningf("Failed to list pods for EgressIP NAD retry in namespace %s: %v", namespace, err) + return + } + for _, pod := range pods { + pod := *pod + if util.PodCompleted(&pod) { + continue + } + e.addEgressIPPodRetry(&pod, "NAD change") + } +} + +func (e *EgressIPController) addEgressIPPodRetry(pod *corev1.Pod, reason string) { + if e.retryEgressIPPods == nil || pod == nil || util.PodCompleted(pod) || !util.PodNeedsSNAT(pod) { + return + } + klog.V(5).Infof("Adding egress IP pod %s/%s for immediate retry due to %s", pod.Namespace, pod.Name, reason) + if err := e.retryEgressIPPods.AddRetryObjWithAddNoBackoff(pod); err != nil { + klog.Warningf("Failed to add pod %s/%s to egressIP retry queue: %v", pod.Namespace, pod.Name, err) + return + } + e.retryEgressIPPods.RequestRetryObjs() +} + func (e *EgressIPController) syncStaleAddressSetIPs(egressIPCache egressIPCache) error { for _, networkPodCache := range egressIPCache.egressIPNameToPods { for networkName, podCache := range networkPodCache { @@ -1921,6 +2114,10 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Failed to get active network for namespace %s, stale objects may remain: %v", namespace.Name, err) continue } + if ni == nil { + klog.V(5).Infof("Skipping namespace %s while building egress IP cache: network not active on local zone", namespace.Name) + continue + } // skip if already processed if _, ok := redirectCache[ni.GetNetworkName()]; ok { continue @@ -2073,6 +2270,10 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Failed to get active network for namespace %s, skipping sync: %v", namespace.Name, err) continue } + if ni == nil { + klog.V(5).Infof("Skipping namespace %s while building egress IP sync cache: network not active on local zone", namespace.Name) + continue + } _, ok := egressIPsCache[egressIP.Name][ni.GetNetworkName()] if ok { continue // aready populated @@ -2346,6 +2547,23 @@ func (e egressStatuses) delete(deleteStatus egressipv1.EgressIPStatusItem) { delete(e.statusMap, deleteStatus) } +func podIPSliceEqual(oldIPs, newIPs []net.IP) bool { + if len(oldIPs) != len(newIPs) { + return false + } + oldIPStrings := make([]string, 0, len(oldIPs)) + for _, podIP := range oldIPs { + oldIPStrings = append(oldIPStrings, podIP.String()) + } + newIPStrings := make([]string, 0, len(newIPs)) + for _, podIP := range newIPs { + newIPStrings = append(newIPStrings, podIP.String()) + } + sort.Strings(oldIPStrings) + sort.Strings(newIPStrings) + return slices.Equal(oldIPStrings, newIPStrings) +} + // podAssignmentState keeps track of which egressIP object is serving // the related pod. // NOTE: At a given time only one object will be configured. This is diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 07d251bee3..bd4402eea0 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/google/uuid" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" "github.com/urfave/cli/v2" @@ -163,9 +164,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s\"}", ni.transitPortIP), // used only for ic=true test "k8s.ovn.org/zone-name": ni.zone, } - if ni.zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = "" - } nodes = append(nodes, getNodeObj(fmt.Sprintf("node%d", nodeSuffix), annotations, map[string]string{})) nodeSuffix = nodeSuffix + 1 } @@ -195,9 +193,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv6\":\"%s\"}", ni.transitPortIP), // used only for ic=true test "k8s.ovn.org/zone-name": ni.zone, } - if ni.zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = "" - } nodes = append(nodes, getNodeObj(fmt.Sprintf("node%d", nodeSuffix), annotations, map[string]string{})) nodeSuffix = nodeSuffix + 1 } @@ -1816,12 +1811,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/egress-assignable": "", } node2 := nodes[1] - if node1Zone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } - if node2Zone != "global" { - node2.Annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } + egressPod := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(eipNamespace) @@ -2623,9 +2613,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node1Zone, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4), } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2637,9 +2624,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4), } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } labels = map[string]string{} node2 := getNodeObj(node2Name, annotations, labels) @@ -3430,7 +3414,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\"}", v4Node1Subnet, v6Node1Subnet), "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"100.88.0.2/16\", \"ipv6\": \"fd97::2/64\"}", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4), - "k8s.ovn.org/zone-name": node1Name, + "k8s.ovn.org/zone-name": "global", } node := getNodeObj(node1Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch fakeOvn.startWithDBSetup( @@ -3619,7 +3603,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" } if !isnode1Local { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" // used only for ic=true test } node1 := getNodeObj(node1Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch @@ -3636,7 +3619,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" if !isnode2Local { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" // used only for ic=true test } node2 := getNodeObj(node2Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch dynamicNeighRouters := "true" @@ -4777,9 +4759,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod lives on node 1, therefore set the zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:feff:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) dynamicNeighRouters := "true" @@ -5015,9 +4994,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod is host by node 1 therefore we set its zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:fedf:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) dynamicNeighRouters := "true" @@ -5272,9 +5248,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod is hosted by node 1 therefore we set its zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:feff:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets(nil, types.DefaultNetworkName, DefaultNetworkControllerName) @@ -5602,9 +5575,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -5617,9 +5587,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } node2 := getNodeObj(node2Name, annotations, labels) _, node2Subnet, _ := net.ParseCIDR(v4Node2Subnet) @@ -7076,7 +7043,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -7429,7 +7396,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -7534,9 +7501,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -7550,9 +7514,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP1 := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -7856,7 +7817,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -11676,7 +11637,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" } if isPodRemote { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" } node2 := getNodeObj(node2Name, annotations, map[string]string{}) @@ -15722,10 +15682,20 @@ func getReRouteStaticRoute(clusterSubnet, nextHop string) *nbdb.LogicalRouterSta } func getNodeObj(nodeName string, annotations, labels map[string]string) corev1.Node { + nodeAnnotations := map[string]string{} + if annotations != nil { + nodeAnnotations = make(map[string]string, len(annotations)+1) + for k, v := range annotations { + nodeAnnotations[k] = v + } + } + if _, ok := nodeAnnotations[util.OvnNodeChassisID]; !ok { + nodeAnnotations[util.OvnNodeChassisID] = uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() + } return corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, - Annotations: annotations, + Annotations: nodeAnnotations, Labels: labels, }, Status: corev1.NodeStatus{ diff --git a/go-controller/pkg/ovn/egressip_udn_l2_test.go b/go-controller/pkg/ovn/egressip_udn_l2_test.go index a31dcac086..1b4d845800 100644 --- a/go-controller/pkg/ovn/egressip_udn_l2_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l2_test.go @@ -550,7 +550,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -567,7 +566,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1082,7 +1080,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1099,7 +1096,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1588,7 +1584,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1605,7 +1600,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1968,7 +1962,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1985,7 +1978,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2337,7 +2329,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2354,7 +2345,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2723,7 +2713,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2740,7 +2729,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, diff --git a/go-controller/pkg/ovn/egressip_udn_l3_test.go b/go-controller/pkg/ovn/egressip_udn_l3_test.go index 3137457a43..a8a50a3724 100644 --- a/go-controller/pkg/ovn/egressip_udn_l3_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l3_test.go @@ -41,26 +41,25 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ) const ( - nadName1 = "nad1" - networkName1 = "network1" - networkName1_ = networkName1 + "_" - node1Name = "node1" - v4Net1 = "20.128.0.0/14" - v4Node1Net1 = "20.128.0.0/16" - v4Pod1IPNode1Net1 = "20.128.0.5" - podName3 = "egress-pod3" - v4Pod2IPNode1Net1 = "20.128.0.6" - v4Node1Tsp = "100.88.0.2" - node2Name = "node2" - v4Node2Net1 = "20.129.0.0/16" - v4Node2Tsp = "100.88.0.3" - podName4 = "egress-pod4" - v4Pod1IPNode2Net1 = "20.129.0.2" - v4Pod2IPNode2Net1 = "20.129.0.3" - eIP1Mark = 50000 - eIP2Mark = 50001 - userDefinedNetworkID = "2" - //tnlKey = zoneinterconnect.BaseTransitSwitchTunnelKey + userDefinedNetworkID + nadName1 = "nad1" + networkName1 = "network1" + networkName1_ = networkName1 + "_" + node1Name = "node1" + v4Net1 = "20.128.0.0/14" + v4Node1Net1 = "20.128.0.0/16" + v4Pod1IPNode1Net1 = "20.128.0.5" + podName3 = "egress-pod3" + v4Pod2IPNode1Net1 = "20.128.0.6" + v4Node1Tsp = "100.88.0.2" + node2Name = "node2" + v4Node2Net1 = "20.129.0.0/16" + v4Node2Tsp = "100.88.0.3" + podName4 = "egress-pod4" + v4Pod1IPNode2Net1 = "20.129.0.2" + v4Pod2IPNode2Net1 = "20.129.0.3" + eIP1Mark = 50000 + eIP2Mark = 50001 + // tnlKey = zoneinterconnect.BaseTransitSwitchTunnelKey + userDefinedNetworkID tnlKey = "16711685" ) @@ -169,6 +168,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -181,6 +181,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMetaWithMark(egressIPName, eIP1Mark), @@ -262,7 +263,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{}, }, } @@ -474,7 +475,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -543,10 +544,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -556,10 +557,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -639,7 +640,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -852,7 +853,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -990,7 +991,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -1064,10 +1065,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -1077,11 +1078,33 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) + gwConfig, err := util.ParseNodeL3GatewayAnnotation(&node1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + rtosPortName := "rtos-" + networkName1_ + node1Name + rtosPortUUID := rtosPortName + "-UUID" + rtosChassisName := rtosPortName + "-" + node1.Annotations[util.OvnNodeChassisID] + rtosChassisUUID := rtosChassisName + "-UUID" + rtosPort := &nbdb.LogicalRouterPort{ + UUID: rtosPortUUID, + Name: rtosPortName, + MAC: util.IPAddrToHWAddr(util.GetNodeGatewayIfAddr(node1UDNSubnet).IP).String(), + Networks: []string{util.GetNodeGatewayIfAddr(node1UDNSubnet).String()}, + Options: map[string]string{ + "gateway_mtu": fmt.Sprintf("%d", config.Default.MTU), + }, + GatewayChassis: []string{rtosChassisUUID}, + } + rtosGatewayChassis := &nbdb.GatewayChassis{ + UUID: rtosChassisUUID, + Name: rtosChassisName, + ChassisName: node1.Annotations[util.OvnNodeChassisID], + Priority: 1, + } twoNodeStatus := []egressipv1.EgressIPStatusItem{ { Node: node1Name, @@ -1160,7 +1183,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -1394,17 +1417,20 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -1437,7 +1463,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -1466,7 +1492,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -1496,7 +1522,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateTwoEgressNodes)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseStateTwoEgressNodes)) ginkgo.By("delete EgressIP") err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1646,14 +1672,17 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: secConInfo.bnc.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -1686,7 +1715,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -1715,7 +1744,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -1745,7 +1774,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseState)) return nil } err := app.Run([]string{app.Name}) @@ -1800,10 +1829,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -1813,10 +1842,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -1896,7 +1925,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2106,7 +2135,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -2115,7 +2144,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateTwoEgressNodes)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseStateTwoEgressNodes)) return nil } err := app.Run([]string{app.Name}) @@ -2170,10 +2199,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2183,10 +2212,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -2266,7 +2295,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2464,7 +2493,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -2531,10 +2560,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2544,11 +2573,33 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) + gwConfig, err := util.ParseNodeL3GatewayAnnotation(&node1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + rtosPortName := "rtos-" + networkName1_ + node1Name + rtosPortUUID := rtosPortName + "-UUID" + rtosChassisName := rtosPortName + "-" + node1.Annotations[util.OvnNodeChassisID] + rtosChassisUUID := rtosChassisName + "-UUID" + rtosPort := &nbdb.LogicalRouterPort{ + UUID: rtosPortUUID, + Name: rtosPortName, + MAC: util.IPAddrToHWAddr(util.GetNodeGatewayIfAddr(node1UDNSubnet).IP).String(), + Networks: []string{util.GetNodeGatewayIfAddr(node1UDNSubnet).String()}, + Options: map[string]string{ + "gateway_mtu": fmt.Sprintf("%d", config.Default.MTU), + }, + GatewayChassis: []string{rtosChassisUUID}, + } + rtosGatewayChassis := &nbdb.GatewayChassis{ + UUID: rtosChassisUUID, + Name: rtosChassisName, + ChassisName: node1.Annotations[util.OvnNodeChassisID], + Priority: 1, + } twoNodeStatus := []egressipv1.EgressIPStatusItem{ { Node: node1Name, @@ -2627,7 +2678,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2869,16 +2920,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -2911,7 +2965,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -2940,7 +2994,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -2980,6 +3034,14 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }) }) +func addL3GatewayConfig(annotations map[string]string, nodeIPv4CIDR, mac string) { + annotations["k8s.ovn.org/l3-gateway-config"] = fmt.Sprintf( + `{"default":{"mode":"local","mac-address":%q, "ip-address":%q, "next-hop":"192.168.126.1"}}`, + mac, + nodeIPv4CIDR, + ) +} + // returns the address set with externalID "k8s.ovn.org/name": "egressip-served-pods"" func buildEgressIPServedPodsAddressSetsForController(ips []string, network, controller string) (*nbdb.AddressSet, *nbdb.AddressSet) { dbIDs := getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, network, controller) diff --git a/go-controller/pkg/ovn/egressservices_test.go b/go-controller/pkg/ovn/egressservices_test.go index 25412c13d5..612941a338 100644 --- a/go-controller/pkg/ovn/egressservices_test.go +++ b/go-controller/pkg/ovn/egressservices_test.go @@ -1700,6 +1700,7 @@ func nodeFor(name, ipv4, ipv6, v4subnet, v6subnet, transitIPv4, transitIPv6 stri "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", ipv4, ipv6), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\",\"%s\"]", fmt.Sprintf("%s/24", ipv4), fmt.Sprintf("%s/64", ipv6)), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\",\"%s\"]}", v4subnet, v6subnet), + util.OvnNodeChassisID: chassisIDForNode(name), // Used only with IC tests "k8s.ovn.org/zone-name": name, diff --git a/go-controller/pkg/ovn/external_gateway_apb_test.go b/go-controller/pkg/ovn/external_gateway_apb_test.go index b237174ae0..2e91281041 100644 --- a/go-controller/pkg/ovn/external_gateway_apb_test.go +++ b/go-controller/pkg/ovn/external_gateway_apb_test.go @@ -178,7 +178,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -214,7 +214,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -324,7 +324,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -360,7 +360,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -463,7 +463,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -499,7 +499,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -606,7 +606,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -652,7 +652,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -814,7 +814,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -896,7 +896,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1039,7 +1039,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, }, @@ -1167,7 +1167,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1238,7 +1238,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1339,7 +1339,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1375,7 +1375,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1481,7 +1481,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1517,7 +1517,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1643,7 +1643,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1679,7 +1679,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1797,7 +1797,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1833,7 +1833,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1860,7 +1860,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1902,7 +1902,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1992,7 +1992,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2112,7 +2112,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2243,7 +2243,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2285,7 +2285,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2341,7 +2341,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2539,7 +2539,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index ddce0de5c7..a961d301c5 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -1381,6 +1381,37 @@ func (gw *GatewayManager) Cleanup() error { return nil } +// NewGatewayManagerForCleanup returns a minimal GatewayManager used only for Cleanup(). Used when +// discovering gateway routers from the DB (e.g. stale cleanup when nodes are gone). layer2UseTransitRouter +// selects the peer port cleanup path (transit router LRP vs join switch LSP). +// +// NOTE: transitRouterInfo is set to an empty struct (not nil) when layer2UseTransitRouter is true. +// This is safe because Cleanup() only checks (transitRouterInfo != nil) to choose between +// deleteGWRouterPeerRouterPort and deleteGWRouterPeerSwitchPort — neither of which accesses +// transitRouterInfo fields. If Cleanup() is ever changed to dereference transitRouterInfo fields, +// this constructor must be updated accordingly. +func NewGatewayManagerForCleanup( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName, gwRouterName, nodeName string, + layer2UseTransitRouter bool, +) *GatewayManager { + var tri *transitRouterInfo + if layer2UseTransitRouter { + tri = &transitRouterInfo{} + } + return &GatewayManager{ + nodeName: nodeName, + clusterRouterName: clusterRouterName, + gwRouterName: gwRouterName, + extSwitchName: netInfo.GetNetworkScopedExtSwitchName(nodeName), + joinSwitchName: joinSwitchName, + nbClient: nbClient, + netInfo: netInfo, + transitRouterInfo: tri, + } +} + func (gw *GatewayManager) delPbrAndNatRules(nodeName string) { // delete the dnat_and_snat entry that we added for the management port IP // Note: we don't need to delete any MAC bindings that are dynamically learned from OVN SB DB diff --git a/go-controller/pkg/ovn/gress_policy.go b/go-controller/pkg/ovn/gress_policy.go index ad20fadfb3..b1f844123c 100644 --- a/go-controller/pkg/ovn/gress_policy.go +++ b/go-controller/pkg/ovn/gress_policy.go @@ -22,6 +22,11 @@ import ( const ( // emptyIdx is used to create ACL for gressPolicy that doesn't have ipBlocks emptyIdx = -1 + // ipBlockCombinedIdx is used when creating an ACL for a gressPolicy + // that contains ipBlocks. Previously, one ACL was created per ipBlock. + // This is changed to create a single combined ACL for all ipBlocks, + // and this special index value identifies those new ACLs. + ipBlockCombinedIdx = -2 ) type gressPolicy struct { @@ -167,14 +172,14 @@ func (gp *gressPolicy) allIPsMatch() string { } } -func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) []string { +func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) string { var direction string if gp.policyType == knet.PolicyTypeIngress { direction = "src" } else { direction = "dst" } - var matchStrings []string + var ipBlockMatches []string var matchStr, ipVersion string for _, ipBlock := range gp.ipBlocks { if utilnet.IsIPv6CIDRString(ipBlock.CIDR) { @@ -185,17 +190,22 @@ func (gp *gressPolicy) getMatchFromIPBlock(lportMatch, l4Match string) []string if len(ipBlock.Except) == 0 { matchStr = fmt.Sprintf("%s.%s == %s", ipVersion, direction, ipBlock.CIDR) } else { - matchStr = fmt.Sprintf("%s.%s == %s && %s.%s != {%s}", ipVersion, direction, ipBlock.CIDR, + matchStr = fmt.Sprintf("(%s.%s == %s && %s.%s != {%s})", ipVersion, direction, ipBlock.CIDR, ipVersion, direction, strings.Join(ipBlock.Except, ", ")) } - if l4Match == libovsdbutil.UnspecifiedL4Match { - matchStr = fmt.Sprintf("%s && %s", matchStr, lportMatch) - } else { - matchStr = fmt.Sprintf("%s && %s && %s", matchStr, l4Match, lportMatch) - } - matchStrings = append(matchStrings, matchStr) + ipBlockMatches = append(ipBlockMatches, matchStr) } - return matchStrings + var l3Match string + if len(ipBlockMatches) == 1 { + l3Match = ipBlockMatches[0] + } else { + l3Match = fmt.Sprintf("(%s)", strings.Join(ipBlockMatches, " || ")) + } + + if l4Match == libovsdbutil.UnspecifiedL4Match { + return fmt.Sprintf("%s && %s", l3Match, lportMatch) + } + return fmt.Sprintf("%s && %s && %s", l3Match, l4Match, lportMatch) } // addNamespaceAddressSet adds a namespace address set to the gress policy. @@ -285,13 +295,11 @@ func (gp *gressPolicy) buildLocalPodACLs(portGroupName string, aclLogging *libov for protocol, l4Match := range libovsdbutil.GetL4MatchesFromNetworkPolicyPorts(gp.portPolicies) { if len(gp.ipBlocks) > 0 { // Add ACL allow rule for IPBlock CIDR - ipBlockMatches := gp.getMatchFromIPBlock(lportMatch, l4Match) - for ipBlockIdx, ipBlockMatch := range ipBlockMatches { - aclIDs := gp.getNetpolACLDbIDs(ipBlockIdx, protocol) - acl := libovsdbutil.BuildACLWithDefaultTier(aclIDs, types.DefaultAllowPriority, ipBlockMatch, action, - aclLogging, gp.aclPipeline) - createdACLs = append(createdACLs, acl) - } + ipBlockMatch := gp.getMatchFromIPBlock(lportMatch, l4Match) + aclIDs := gp.getNetpolACLDbIDs(ipBlockCombinedIdx, protocol) + acl := libovsdbutil.BuildACLWithDefaultTier(aclIDs, types.DefaultAllowPriority, ipBlockMatch, action, + aclLogging, gp.aclPipeline) + createdACLs = append(createdACLs, acl) } // if there are pod/namespace selector, then allow packets from/to that address_set or // if the NetworkPolicyPeer is empty, then allow from all sources or to all destinations. @@ -334,10 +342,10 @@ func (gp *gressPolicy) getNetpolACLDbIDs(ipBlockIdx int, protocol string) *libov // gress rule index libovsdbops.GressIdxKey: strconv.Itoa(gp.idx), // acls are created for every gp.portPolicies which are grouped by protocol: - // - for empty policy (no selectors and no ip blocks) - empty ACL + // - for empty policy (no selectors and no ip blocks) - empty ACL with idx=emptyIdx (-1) // OR - // - all selector-based peers ACL - // - for every IPBlock +1 ACL + // - all selector-based peers ACL with idx=emptyIdx (-1) + // - all ipBlocks combined into a single ACL with idx=ipBlockCombinedIdx (-2) // Therefore unique id for a given gressPolicy is protocol name + IPBlock idx // (protocol will be "None" if no port policy is defined, and empty policy and all // selector-based peers ACLs will have idx=-1) diff --git a/go-controller/pkg/ovn/gress_policy_test.go b/go-controller/pkg/ovn/gress_policy_test.go index 14b2a65a7c..f45be5385a 100644 --- a/go-controller/pkg/ovn/gress_policy_test.go +++ b/go-controller/pkg/ovn/gress_policy_test.go @@ -16,7 +16,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { ipBlocks []*knet.IPBlock lportMatch string l4Match string - expected []string + expected string }{ { desc: "IPv4 only no except", @@ -27,7 +27,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && input && fake"}, + expected: "ip4.src == 0.0.0.0/0 && input && fake", }, { desc: "multiple IPv4 only no except", @@ -41,8 +41,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && input && fake", - "ip4.src == 10.1.0.0/16 && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 || ip4.src == 10.1.0.0/16) && input && fake", }, { desc: "IPv6 only no except", @@ -53,7 +52,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip6.src == fd00:10:244:3::49/32 && input && fake"}, + expected: "ip6.src == fd00:10:244:3::49/32 && input && fake", }, { desc: "mixed IPv4 and IPv6 no except", @@ -67,8 +66,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip6.src == ::/0 && input && fake", - "ip4.src == 0.0.0.0/0 && input && fake"}, + expected: "(ip6.src == ::/0 || ip4.src == 0.0.0.0/0) && input && fake", }, { desc: "IPv4 only with except", @@ -80,7 +78,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) && input && fake", }, { desc: "multiple IPv4 with except", @@ -95,8 +93,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake", - "ip4.src == 10.1.0.0/16 && input && fake"}, + expected: "((ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) || ip4.src == 10.1.0.0/16) && input && fake", }, { desc: "IPv4 with IPv4 except", @@ -108,7 +105,7 @@ func TestGetMatchFromIPBlock(t *testing.T) { }, lportMatch: "fake", l4Match: "input", - expected: []string{"ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16} && input && fake"}, + expected: "(ip4.src == 0.0.0.0/0 && ip4.src != {10.1.0.0/16}) && input && fake", }, } diff --git a/go-controller/pkg/ovn/hybrid.go b/go-controller/pkg/ovn/hybrid.go index 0164cc4076..d9a5610940 100644 --- a/go-controller/pkg/ovn/hybrid.go +++ b/go-controller/pkg/ovn/hybrid.go @@ -279,7 +279,7 @@ func (oc *DefaultNetworkController) setupHybridLRPolicySharedGw(nodeSubnets []*n }, &clusterRouterStaticRoutes.Nexthop); err != nil { return fmt.Errorf("failed to add policy route static '%s %s' for on %s , error: %w", clusterRouterStaticRoutes.IPPrefix, clusterRouterStaticRoutes.Nexthop, - oc.GetNetworkScopedGWRouterName(nodeName), err) + ovntypes.OVNClusterRouter, err) } klog.Infof("Created hybrid overlay logical route static route at cluster router for node %s", nodeName) diff --git a/go-controller/pkg/ovn/kubevirt_test.go b/go-controller/pkg/ovn/kubevirt_test.go index 5f2ce57b3f..9aad857d9c 100644 --- a/go-controller/pkg/ovn/kubevirt_test.go +++ b/go-controller/pkg/ovn/kubevirt_test.go @@ -748,7 +748,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node1].transitSwitchPortIPv4, nodeByName[node1].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node1].subnetIPv4, nodeByName[node1].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:51", "ip-addresses":[%q, %q]}}`, nodeByName[node1].addressIPv4, nodeByName[node1].addressIPv6), - "k8s.ovn.org/node-chassis-id": "1", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node1), util.OvnNodeID: nodeByName[node1].nodeID, }, }, @@ -760,7 +760,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node2].transitSwitchPortIPv4, nodeByName[node2].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node2].subnetIPv4, nodeByName[node2].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:52", "ip-addresses":[%q, %q]}}`, nodeByName[node2].addressIPv4, nodeByName[node2].addressIPv6), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node2), util.OvnNodeID: nodeByName[node2].nodeID, }, }, @@ -772,7 +772,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node3].transitSwitchPortIPv4, nodeByName[node3].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node3].subnetIPv4, nodeByName[node3].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:53", "ip-addresses":[%q, %q]}}`, nodeByName[node3].addressIPv4, nodeByName[node3].addressIPv6), - "k8s.ovn.org/node-chassis-id": "3", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node3), util.OvnNodeID: nodeByName[node3].nodeID, }, }, diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index ff9ef2a4b4..3649153b41 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -398,6 +398,11 @@ func NewLayer2UserDefinedNetworkController( eIPController: eIPController, remoteNodesNoRouter: sync.Map{}, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneinterconnect.NewZoneInterconnectHandler(oc.GetNetInfo(), oc.nbClient, oc.sbClient, oc.watchFactory) @@ -490,6 +495,20 @@ func (oc *Layer2UserDefinedNetworkController) run() error { // could be called from a dummy Controller (only has CommonNetworkControllerInfo set) func (oc *Layer2UserDefinedNetworkController) Cleanup() error { networkName := oc.GetNetworkName() + + // For primary Layer2 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", networkName, err) + } + } + + // Switch that holds management ports is deleted below (BaseLayer2UserDefinedNetworkController.cleanup); + // LSPs are cascade-deleted with the logical switch. if err := oc.BaseLayer2UserDefinedNetworkController.cleanup(); err != nil { return fmt.Errorf("failed to cleanup network %q: %w", networkName, err) } @@ -526,13 +545,8 @@ func (oc *Layer2UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } @@ -817,9 +831,19 @@ func (oc *Layer2UserDefinedNetworkController) addSwitchPortForRemoteNodeGR(node return fmt.Errorf("failed to fetch tunnelID annotation from the node %s for network %s, err: %w", node.Name, oc.GetNetworkName(), err) } + + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + logicalSwitchPort.Options = map[string]string{ libovsdbops.RequestedTnlKey: strconv.Itoa(tunnelID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, } sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(types.OVNLayer2Switch)} err = libovsdbops.CreateOrUpdateLogicalSwitchPortsOnSwitch(oc.nbClient, &sw, &logicalSwitchPort) @@ -889,13 +913,23 @@ func (oc *Layer2UserDefinedNetworkController) addRouterSetupForRemoteNodeGR(node if err != nil { return nil } + + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + transitPort := nbdb.LogicalRouterPort{ Name: types.TransitRouterToRouterPrefix + oc.GetNetworkScopedGWRouterName(node.Name), MAC: util.IPAddrToHWAddr(transitRouterInfo.transitRouterNets[0].IP).String(), Networks: util.IPNetsToStringSlice(transitRouterInfo.transitRouterNets), Options: map[string]string{ libovsdbops.RequestedTnlKey: getTransitRouterPortTunnelKey(transitRouterInfo.nodeID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, }, ExternalIDs: map[string]string{ types.NetworkExternalID: oc.GetNetworkName(), @@ -992,10 +1026,12 @@ func (oc *Layer2UserDefinedNetworkController) cleanupRouterSetupForRemoteNodeGR( func (oc *Layer2UserDefinedNetworkController) deleteNodeEvent(node *corev1.Node) error { if _, local := oc.localZoneNodes.Load(node.Name); local { - if err := oc.gatewayManagerForNode(node.Name).Cleanup(); err != nil { - return fmt.Errorf("failed to cleanup gateway on node %q: %w", node.Name, err) + if util.IsNetworkSegmentationSupportEnabled() && oc.IsPrimaryNetwork() { + if err := oc.gatewayManagerForNode(node.Name).Cleanup(); err != nil { + return fmt.Errorf("failed to cleanup gateway on node %q: %w", node.Name, err) + } + oc.gatewayManagers.Delete(node.Name) } - oc.gatewayManagers.Delete(node.Name) } else { if config.Layer2UsesTransitRouter { // this is a no-op for local nodes diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go index 947dfbfea9..32cfaca26b 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go @@ -135,7 +135,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, []testPod{podInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...)) + ).expectedLogicalSwitchesAndPorts()...)) return nil } @@ -266,7 +266,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, []testPod{sourcePodInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...)) + ).expectedLogicalSwitchesAndPorts()...)) targetPodInfo := dummyL2TestPod(ns, netInfo, targetPodInfoIdx, userDefinedNetworkIdx) targetKvPod := newMultiHomedKubevirtPod( @@ -293,7 +293,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, testPods, expectationOptions..., - ).expectedLogicalSwitchesAndPortsWithLspEnabled(netInfo.isPrimary, expectedPodLspEnabled)...)) + ).expectedLogicalSwitchesAndPortsWithLspEnabled(expectedPodLspEnabled)...)) return nil } @@ -365,13 +365,8 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { "user-defined network controller DB entities are properly cleaned up", func(netInfo userDefinedNetInfo, testConfig testConfiguration) { podInfo := dummyTestPod(ns, netInfo) - if testConfig.configToOverride != nil { - config.OVNKubernetesFeature = *testConfig.configToOverride - if testConfig.gatewayConfig != nil { - config.Gateway.DisableSNATMultipleGWs = testConfig.gatewayConfig.DisableSNATMultipleGWs - } - config.OVNKubernetesFeature.EnableMultiNetwork = true - } + setupConfig(netInfo, testConfig, config.GatewayModeShared) + config.OVNKubernetesFeature.EnableMultiNetwork = true app.Action = func(*cli.Context) error { netConf := netInfo.netconf() networkConfig, err := util.NewNetInfo(netConf) @@ -395,7 +390,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { gwConfig, err := util.ParseNodeL3GatewayAnnotation(testNode) Expect(err).NotTo(HaveOccurred()) Expect(gwConfig.NextHops).NotTo(BeEmpty()) - nbZone := &nbdb.NBGlobal{Name: ovntypes.OvnDefaultZone, UUID: ovntypes.OvnDefaultZone} + nbZone := &nbdb.NBGlobal{Name: config.Default.Zone, UUID: config.Default.Zone} n := newNamespace(ns) if netInfo.isPrimary { @@ -488,6 +483,95 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { ), ) + It("primary layer 2 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + setupConfig(dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16"), testConfiguration{}, config.GatewayModeShared) + app.Action = func(ctx *cli.Context) error { + netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(ovntypes.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + fakeNetworkManager := &testnm.FakeNetworkManager{ + PrimaryNetworks: map[string]util.NetInfo{}, + } + fakeNetworkManager.PrimaryNetworks[ns] = mutableNetInfo + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + nbZone := &nbdb.NBGlobal{Name: config.Default.Zone, UUID: config.Default.Zone} + + // Minimal initialDB: no UDN entities. init() + watchers create them. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + Expect(fakeOvn.networkManager.Start()).To(Succeed()) + defer fakeOvn.networkManager.Stop() + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities, then watchers so node sync creates per-node entities. + l2Controller, ok := fakeOvn.fullL2UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l2Controller.init()).To(Succeed()) + udnNetController, ok := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + udnNetController.bnc.ovnClusterLRPToJoinIfAddrs = dummyJoinIPs() + Expect(l2Controller.WatchNodes()).To(Succeed()) + Expect(l2Controller.WatchPods()).To(Succeed()) + Expect(l2Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create the Layer2 switch. + udnLSName := l2Controller.GetNetworkScopedSwitchName(ovntypes.OVNLayer2Switch) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.Name == udnLSName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Assert gateway router was created before cleanup. + udnGWRouterName := l2Controller.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == udnGWRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Dummy controller with InvalidID runs Cleanup() to remove all entities for this network. + dummyController, err := NewLayer2UserDefinedNetworkController( + &l2Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeOvn.networkManager.Interface(), + nil, + NewPortCache(ctx.Done()), + nil, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(generateUDNPostInitDB([]libovsdbtest.TestData{nbZone}))) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + It("controller should cleanup stale nodes on startup", func() { app.Action = func(*cli.Context) error { netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") @@ -1040,6 +1124,7 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC } if staleNode { staleNodeName := "stale-node" + staleNodeChassisID := chassisIDForNode("stale-node") // create remote router port remoteRouterName := fmt.Sprintf("GR_%s_%s", netInfo.GetNetworkName(), staleNodeName) remotePortName := fmt.Sprintf("%s%s", ovntypes.TransitRouterToRouterPrefix, remoteRouterName) @@ -1054,7 +1139,7 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC MAC: util.IPAddrToHWAddr(remoteTRInfo.transitRouterNets[0].IP).String(), Options: map[string]string{ libovsdbops.RequestedTnlKey: "15", // as defined by getTransitRouterPortTunnelKey(nodeID) - libovsdbops.RequestedChassis: staleNodeName}, + libovsdbops.RequestedChassis: staleNodeChassisID}, ExternalIDs: externalIDs, } expectedEntities = append(expectedEntities, remotePort) diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index ff9da8b800..a1654d3244 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -385,6 +385,11 @@ func NewLayer3UserDefinedNetworkController( gatewayManagers: sync.Map{}, eIPController: eIPController, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneic.NewZoneInterconnectHandler(oc.GetNetInfo(), cnci.nbClient, cnci.sbClient, cnci.watchFactory) @@ -518,6 +523,19 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { // Note : Cluster manager removes the subnet annotation for the node. netName := oc.GetNetworkName() klog.Infof("Delete OVN logical entities for %s network controller of network %s", types.Layer3Topology, netName) + + // For primary L3 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", netName, err) + } + } + + // Node switches (which hold management port LSPs) are deleted below; LSPs are cascade-deleted with the logical switch. // first delete node logical switches ops, err = libovsdbops.DeleteLogicalSwitchesWithPredicateOps(oc.nbClient, ops, func(item *nbdb.LogicalSwitch) bool { @@ -557,6 +575,16 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { return err } + // Delete QoS rows for this network (e.g. from NetworkQoS controller). Applies to primary and + // secondary Layer3 UDNs when EnableNetworkQoS is set. + ops, err = libovsdbops.DeleteQoSesWithPredicateOps(oc.nbClient, ops, + func(item *nbdb.QoS) bool { + return item.ExternalIDs[types.NetworkExternalID] == netName + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting QoSes of network %s: %v", netName, err) + } + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) if err != nil { return fmt.Errorf("failed to deleting routers/switches of network %s: %v", netName, err) @@ -569,13 +597,8 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } @@ -639,11 +662,11 @@ func (oc *Layer3UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } klog.Infof("Completing all the Watchers for network %s took %v", oc.GetNetworkName(), time.Since(start)) @@ -1048,8 +1071,8 @@ func (oc *Layer3UserDefinedNetworkController) syncNodes(nodes []interface{}) err } if config.OVNKubernetesFeature.EnableInterconnect { - if err := oc.zoneICHandler.SyncNodes(activeNodes); err != nil { - return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + if err := oc.zoneICHandler.CleanupStaleNodes(activeNodes); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup stale nodes: error: %w", err) } } diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go index 52c3aab0d6..caea5164a9 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go @@ -253,7 +253,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { fakeOvn, []testPod{podInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...))) + ).expectedLogicalSwitchesAndPorts()...))) return nil } @@ -459,6 +459,112 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { }), ), ) + + It("primary Layer 3 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + netInfo := dummyPrimaryLayer3UserDefinedNetwork("192.168.0.0/16", "192.168.1.0/24") + app.Action = func(ctx *cli.Context) error { + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + // For cleanup we use a copy with InvalidID so the dummy controller treats the network as stale. + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(types.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + // Dummy controller only runs Cleanup(), which does not use the network manager; empty fake is enough. + fakeNetworkManager := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + + // NB_Global with default zone so GetNBZone returns it; node without zone annotation is treated as local. + nbZone := &nbdb.NBGlobal{Name: types.OvnDefaultZone, UUID: types.OvnDefaultZone} + // Post-cleanup DB: default net node switch + NB_Global + global entities (Copp, meters) as in Layer2 test. + defaultNetExpectations := generateUDNPostInitDB(append(emptyDefaultClusterNetworkNodeSwitch(nodeName), nbZone)) + + // Minimal initialDB: default net node switch, no UDN entities. The UDN controller's Start() + // runs init() which creates cluster router and join switch; then node sync creates per-node entities. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + // Mock ovn-nbctl list Load_Balancer_Group (used by UDN controller init; default controller init is not run in this test). + fexec := util.GetExec().(*testing.FakeExec) + fexec.AddFakeCmdsNoOutputNoError([]string{ + "ovn-nbctl --timeout=15 --columns=_uuid list Load_Balancer_Group", + }) + + // networkManager is already started by startWithDBSetup (via init()) and stopped by AfterEach (shutdown). + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities (cluster router, join switch, LB groups, etc.), + // then start watchers so node sync creates per-node entities (node LS, GW router, etc.). + l3Controller, ok := fakeOvn.fullL3UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l3Controller.init()).To(Succeed()) + Expect(l3Controller.WatchNodes()).To(Succeed()) + Expect(l3Controller.WatchPods()).To(Succeed()) + Expect(l3Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create UDN entities: assert any switches and routers exist with this network's external-ids, + // and that the gateway router for this node exists. + networkName := networkConfig.GetNetworkName() + gwRouterName := networkConfig.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.ExternalIDs != nil && ls.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty(), "at least one LogicalSwitch for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs != nil && lr.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "at least one LogicalRouter for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == gwRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "gateway router %q should exist", gwRouterName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Do NOT delete the NAD. Simulate CleanupStaleNetworks(no valid networks): dummy controller + // with InvalidID runs Cleanup() so our network is treated as stale and all its entities are removed. + dummyController, err := NewLayer3UserDefinedNetworkController( + &l3Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeNetworkManager, + nil, + nil, + NewPortCache(ctx.Done()), + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(defaultNetExpectations)) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + Describe("Dynamic UDN allocation with remote node", func() { It("activates a remote node when a NAD becomes active and cleans it up when inactive", func() { Expect(config.PrepareTestConfig()).To(Succeed()) @@ -841,10 +947,6 @@ func makeCUDNOwnerRef(name string) metav1.OwnerReference { } } -func (sni *userDefinedNetInfo) getNetworkRole() string { - return util.GetUserDefinedNetworkRole(sni.isPrimary) -} - func getNetworkRole(netInfo util.NetInfo) string { return util.GetUserDefinedNetworkRole(netInfo.IsPrimaryNetwork()) } @@ -855,10 +957,7 @@ func (sni *userDefinedNetInfo) setupOVNDependencies(dbData *libovsdbtest.TestSet return err } - externalIDs := map[string]string{ - types.NetworkExternalID: sni.netName, - types.NetworkRoleExternalID: sni.getNetworkRole(), - } + externalIDs := util.GenerateExternalIDsForSwitchOrRouter(netInfo) switch sni.topology { case types.Layer2Topology: dbData.NBData = append(dbData.NBData, &nbdb.LogicalSwitch{ @@ -1010,7 +1109,7 @@ func newNodeWithUserDefinedNetworks(nodeName string, nodeIPv4CIDR string, netInf util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4CIDR), "k8s.ovn.org/zone-name": zone, "k8s.ovn.org/l3-gateway-config": fmt.Sprintf("{\"default\":{\"mode\":\"shared\",\"bridge-id\":\"breth0\",\"interface-id\":\"breth0_ovn-worker\",\"mac-address\":%q,\"ip-addresses\":[%[2]q],\"ip-address\":%[2]q,\"next-hops\":[%[3]q],\"next-hop\":%[3]q,\"node-port-enable\":\"true\",\"vlan-id\":\"0\"}}", util.IPAddrToHWAddr(nodeIP), nodeCIDR, nextHopIP), - util.OvnNodeChassisID: "abdcef", + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/network-ids": fmt.Sprintf("{\"default\":\"0\",\"isolatednet\":\"%s\"}", userDefinedNetworkID), util.OvnNodeID: "4", "k8s.ovn.org/udn-layer2-node-gateway-router-lrp-tunnel-ids": "{\"isolatednet\":\"25\"}", diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 3fe803660e..d969e73d17 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -276,17 +276,17 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { return } - localZoneNodeNames := make([]string, 0, len(kNodes)) - remoteZoneNodeNames := make([]string, 0, len(kNodes)) + localZoneNodes := make([]*corev1.Node, 0, len(kNodes)) + remoteZoneNodes := make([]*corev1.Node, 0, len(kNodes)) for i := range kNodes { if oc.isLocalZoneNode(kNodes[i]) { - localZoneNodeNames = append(localZoneNodeNames, kNodes[i].Name) + localZoneNodes = append(localZoneNodes, kNodes[i]) } else { - remoteZoneNodeNames = append(remoteZoneNodeNames, kNodes[i].Name) + remoteZoneNodes = append(remoteZoneNodes, kNodes[i]) } } - if err := oc.syncChassis(localZoneNodeNames, remoteZoneNodeNames); err != nil { + if err := oc.syncChassis(localZoneNodes, remoteZoneNodes); err != nil { klog.Errorf("Failed to sync chassis: error: %v", err) } } @@ -297,8 +297,8 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { // do not want to delete. func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { foundNodes := sets.New[string]() - localZoneNodeNames := make([]string, 0, len(kNodes)) - remoteZoneKNodeNames := make([]string, 0, len(kNodes)) + localZoneNodes := make([]*corev1.Node, 0, len(kNodes)) + remoteZoneNodes := make([]*corev1.Node, 0, len(kNodes)) for _, tmp := range kNodes { node, ok := tmp.(*corev1.Node) if !ok { @@ -313,9 +313,9 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if oc.isLocalZoneNode(node) { foundNodes.Insert(node.Name) oc.localZoneNodes.Store(node.Name, true) - localZoneNodeNames = append(localZoneNodeNames, node.Name) + localZoneNodes = append(localZoneNodes, node) } else { - remoteZoneKNodeNames = append(remoteZoneKNodeNames, node.Name) + remoteZoneNodes = append(remoteZoneNodes, node) } } @@ -359,7 +359,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if ok { return false } - nodeName := strings.TrimPrefix(item.Name, types.GWRouterPrefix) + nodeName := util.GetWorkerFromGatewayRouter(item.Name) if nodeName != item.Name && len(nodeName) > 0 && !foundNodes.Has(nodeName) { staleSwitches.Insert(nodeName) return true @@ -378,17 +378,28 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { } } - if err := oc.syncChassis(localZoneNodeNames, remoteZoneKNodeNames); err != nil { + if err := oc.syncChassis(localZoneNodes, remoteZoneNodes); err != nil { return fmt.Errorf("failed to sync chassis: error: %v", err) } if config.OVNKubernetesFeature.EnableInterconnect { + // Chassis cleanup should happen regardless of transport mode to cleanup + // any stale remote chassis entries (e.g., from overlay->no-overlay migration) if err := oc.zoneChassisHandler.SyncNodes(kNodes); err != nil { return fmt.Errorf("zoneChassisHandler failed to sync nodes: error: %w", err) } - if err := oc.zoneICHandler.SyncNodes(kNodes); err != nil { - return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + // Interconnect resource sync depends on transport mode: + // - For overlay: ensure transit switch exists and cleanup stale resources + // - For no-overlay: cleanup all interconnect resources (nodes and transit switch) + if oc.Transport() == types.NetworkTransportNoOverlay { + if err := oc.zoneICHandler.Cleanup(); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup: error: %w", err) + } + } else { + if err := oc.zoneICHandler.CleanupStaleNodes(kNodes); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup stale nodes: error: %w", err) + } } } @@ -397,7 +408,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { // Cleanup stale chassis and chassis template variables with no // corresponding nodes. -func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNodeNames []string) error { +func (oc *DefaultNetworkController) syncChassis(localZoneNodes, remoteZoneNodes []*corev1.Node) error { chassisList, err := libovsdbops.ListChassis(oc.sbClient) if err != nil { return fmt.Errorf("failed to get chassis list: error: %v", err) @@ -418,10 +429,8 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo } } - chassisHostNameMap := map[string]*sbdb.Chassis{} chassisNameMap := map[string]*sbdb.Chassis{} for _, chassis := range chassisList { - chassisHostNameMap[chassis.Hostname] = chassis chassisNameMap[chassis.Name] = chassis } @@ -443,26 +452,33 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo // Delete existing nodes from the chassis map. // Also delete existing templateVars from the template map. - for _, nodeName := range localZoneNodeNames { - if chassis, ok := chassisHostNameMap[nodeName]; ok { - delete(chassisNameMap, chassis.Name) - delete(chassisHostNameMap, chassis.Hostname) - delete(templateChassisMap, chassis.Name) + for _, node := range localZoneNodes { + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + klog.Warningf("Unable to parse local node %s chassis-id annotation. Chassis may be removed during sync", + node.Name) + continue } + delete(chassisNameMap, chassisID) + delete(templateChassisMap, chassisID) } // Delete existing remote zone nodes from the chassis map, but not from the templateVars // as we need to cleanup chassisTemplateVars for the remote zone nodes - for _, nodeName := range remoteZoneNodeNames { - if chassis, ok := chassisHostNameMap[nodeName]; ok { - delete(chassisNameMap, chassis.Name) - delete(chassisHostNameMap, chassis.Hostname) + for _, node := range remoteZoneNodes { + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + klog.Warningf("Unable to parse remote node %s chassis-id annotation. Chassis may be removed during sync", + node.Name) + continue } + delete(chassisNameMap, chassisID) } - staleChassis := make([]*sbdb.Chassis, 0, len(chassisHostNameMap)) - for _, chassis := range chassisNameMap { + staleChassis := make([]*sbdb.Chassis, 0, len(chassisNameMap)) + for name, chassis := range chassisNameMap { staleChassis = append(staleChassis, chassis) + klog.Infof("Removing stale chassis with ID/Name: %s, hostname: %s", name, chassis.Hostname) } staleChassisTemplateVars := make([]*nbdb.ChassisTemplateVar, 0, len(templateChassisMap)) @@ -471,11 +487,11 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo } if err := libovsdbops.DeleteChassis(oc.sbClient, staleChassis...); err != nil { - return fmt.Errorf("failed Deleting chassis %v error: %v", chassisHostNameMap, err) + return fmt.Errorf("failed Deleting chassis %#v error: %v", chassisNameMap, err) } if err := libovsdbops.DeleteChassisTemplateVar(oc.nbClient, staleChassisTemplateVars...); err != nil { - return fmt.Errorf("failed Deleting chassis template vars %v error: %v", chassisHostNameMap, err) + return fmt.Errorf("failed Deleting chassis template vars %#v error: %v", staleChassisTemplateVars, err) } return nil @@ -638,21 +654,33 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *corev1.Node, n } if nSyncs.syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { - // Call zone chassis handler's AddLocalZoneNode function to mark + // Always call zone chassis handler's AddLocalZoneNode function to mark // this node's chassis record in Southbound db as a local zone chassis. - // This is required when a node moves from a remote zone to local zone + // This is required even when the default network uses no-overlay transport, + // because user-defined networks may still use overlay transport and require + // the chassis entries for their transit switch connectivity. + chassisFailed := false if err := oc.zoneChassisHandler.AddLocalZoneNode(node); err != nil { errs = append(errs, err) oc.syncZoneICFailed.Store(node.Name, true) - } else { + chassisFailed = true + } + + // For no-overlay transport, the default network's interconnect resources are not needed. + // The transit switch and its resources are cleaned up during sync, so we only need + // to create IC resources for overlay transport. + if oc.Transport() != types.NetworkTransportNoOverlay { // Call zone IC handler's AddLocalZoneNode function to create // interconnect resources in the OVN Northbound db for this local zone node. if err := oc.zoneICHandler.AddLocalZoneNode(node); err != nil { errs = append(errs, err) oc.syncZoneICFailed.Store(node.Name, true) - } else { + } else if !chassisFailed { oc.syncZoneICFailed.Delete(node.Name) } + } else if !chassisFailed { + // In no-overlay mode, if chassis handler succeeded, clear the failed state + oc.syncZoneICFailed.Delete(node.Name) } } @@ -680,25 +708,34 @@ func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *corev1.Node, var err error if syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { - // Call zone chassis handler's AddRemoteZoneNode function to creates - // the remote chassis for the remote zone node in the SB DB or mark - // the entry as remote if it was local chassis earlier + // Always create remote chassis entry with geneve encapsulation. + // This is needed even when the default network uses no-overlay transport, + // because user-defined networks may still use overlay transport and require + // the remote chassis entries for their transit switch connectivity. if err = oc.zoneChassisHandler.AddRemoteZoneNode(node); err != nil { err = fmt.Errorf("adding or updating remote node chassis %s failed, err - %w", node.Name, err) oc.syncZoneICFailed.Store(node.Name, true) return err } - // Call zone IC handler's AddRemoteZoneNode function to create - // interconnect resources in the OVN NBDB for this remote zone node. - // Also, create the remote port binding in SBDB - if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { - err = fmt.Errorf("adding or updating remote node IC resources %s failed, err - %w", node.Name, err) - oc.syncZoneICFailed.Store(node.Name, true) + // For no-overlay transport, the default network's interconnect resources are not needed. + // The transit switch and its resources are cleaned up during sync, so we only need + // to create IC resources for overlay transport. + if oc.Transport() != types.NetworkTransportNoOverlay { + // Call zone IC handler's AddRemoteZoneNode function to create + // interconnect resources in the OVN NBDB for this remote zone node. + // Also, create the remote port binding in SBDB + if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { + err = fmt.Errorf("adding or updating remote node IC resources %s failed, err - %w", node.Name, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + klog.V(5).Infof("Creating Interconnect resources for remote node %q on network %q took: %s", node.Name, oc.GetNetworkName(), time.Since(start)) } else { + // In no-overlay mode, if chassis handler succeeded, clear the failed state oc.syncZoneICFailed.Delete(node.Name) } - klog.V(5).Infof("Creating Interconnect resources for remote node %q on network %q took: %s", node.Name, oc.GetNetworkName(), time.Since(start)) } return err } diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 8de1687ee3..f040f1a8bb 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -1731,7 +1731,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { Name: "newNode", Annotations: map[string]string{ "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"fd02:0:0:2::2895/64\"]}", newNodeSubnet), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("newNode"), util.OvnNodeID: "2", }, }, @@ -1793,7 +1793,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { Name: "newNode", Annotations: map[string]string{ "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", newNodeIpv4Subnet), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("newNode"), "k8s.ovn.org/node-gateway-router-lrp-ifaddr": "{\"ipv4\":\"100.64.0.2/16\"}", }, }, @@ -1906,7 +1906,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { newNodeSubnet := "10.1.2.0/24" transitSwitchSubnet := "100.88.0.3/16" testNode.Annotations["k8s.ovn.org/node-subnets"] = fmt.Sprintf("{\"default\":[\"%s\"]}", newNodeSubnet) - testNode.Annotations["k8s.ovn.org/node-chassis-id"] = "2" + testNode.Annotations["k8s.ovn.org/node-chassis-id"] = chassisIDForNode(testNode.Name) testNode.Annotations["k8s.ovn.org/node-transit-switch-port-ifaddr"] = fmt.Sprintf("{\"ipv4\":\"%s\"}", transitSwitchSubnet) testNode.Annotations["k8s.ovn.org/zone-name"] = "foo" updatedNode, err := fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &testNode, metav1.CreateOptions{}) @@ -2135,15 +2135,15 @@ func TestController_syncNodes(t *testing.T) { { name: "removes stale chassis and chassis private", initialSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1", Hostname: node1Name}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, - &sbdb.Chassis{Name: "chassis-node2", Hostname: nodeRmName}, - &sbdb.ChassisPrivate{Name: "chassis-node2"}, - &sbdb.ChassisPrivate{Name: "chassis-node3"}, + &sbdb.Chassis{Name: chassisIDForNode(node1Name), Hostname: node1Name}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(node1Name)}, + &sbdb.Chassis{Name: chassisIDForNode(nodeRmName), Hostname: nodeRmName}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(nodeRmName)}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node3")}, }, expectedSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1", Hostname: node1Name}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, + &sbdb.Chassis{Name: chassisIDForNode(node1Name), Hostname: node1Name}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(node1Name)}, }, }, } @@ -2159,6 +2159,9 @@ func TestController_syncNodes(t *testing.T) { testNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node1", + Annotations: map[string]string{ + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node1Name), + }, }, } @@ -2243,20 +2246,20 @@ func TestController_deleteStaleNodeChassis(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - "k8s.ovn.org/node-chassis-id": "chassis-node1-dpu", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("node1-dpu"), }, }, }, name: "removes stale chassis when ovn running on DPU", initialSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1-dpu", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1-dpu"}, - &sbdb.Chassis{Name: "chassis-node1", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, + &sbdb.Chassis{Name: chassisIDForNode("node1-dpu"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1-dpu")}, + &sbdb.Chassis{Name: chassisIDForNode("node1"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1")}, }, expectedSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1-dpu", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1-dpu"}, + &sbdb.Chassis{Name: chassisIDForNode("node1-dpu"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1-dpu")}, }, }, } diff --git a/go-controller/pkg/ovn/multicast_test.go b/go-controller/pkg/ovn/multicast_test.go index 0c80906feb..b951fc0957 100644 --- a/go-controller/pkg/ovn/multicast_test.go +++ b/go-controller/pkg/ovn/multicast_test.go @@ -264,7 +264,7 @@ func newNodeWithNad(nad *nadapi.NetworkAttachmentDefinition, networkName, networ n.Annotations["k8s.ovn.org/node-subnets"] = fmt.Sprintf("{\"default\":\"192.168.126.202/24\", \"%s\":\"192.168.127.202/24\"}", networkName) n.Annotations["k8s.ovn.org/network-ids"] = fmt.Sprintf("{\"default\":\"0\",\"%s\":\"%s\"}", networkName, networkID) n.Annotations["k8s.ovn.org/node-mgmt-port-mac-addresses"] = fmt.Sprintf("{\"default\":\"96:8f:e8:25:a2:e5\",\"%s\":\"d6:bc:85:32:30:fb\"}", networkName) - n.Annotations["k8s.ovn.org/node-chassis-id"] = "abdcef" + n.Annotations["k8s.ovn.org/node-chassis-id"] = chassisIDForNode(n.Name) n.Annotations["k8s.ovn.org/l3-gateway-config"] = "{\"default\":{\"mac-address\":\"52:54:00:e2:ed:d0\",\"ip-addresses\":[\"10.1.1.10/24\"],\"ip-address\":\"10.1.1.10/24\",\"next-hops\":[\"10.1.1.1\"],\"next-hop\":\"10.1.1.1\"}}" n.Annotations[util.OvnNodeID] = "4" } diff --git a/go-controller/pkg/ovn/multihoming_test.go b/go-controller/pkg/ovn/multihoming_test.go index e41593dd77..3743df4e03 100644 --- a/go-controller/pkg/ovn/multihoming_test.go +++ b/go-controller/pkg/ovn/multihoming_test.go @@ -118,11 +118,11 @@ func withClusterPortGroup() option { } } -func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPorts(isPrimary bool) []libovsdbtest.TestData { - return em.expectedLogicalSwitchesAndPortsWithLspEnabled(isPrimary, nil) +func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPorts() []libovsdbtest.TestData { + return em.expectedLogicalSwitchesAndPortsWithLspEnabled(nil) } -func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsWithLspEnabled(isPrimary bool, expectedPodLspEnabled map[string]*bool) []libovsdbtest.TestData { +func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsWithLspEnabled(expectedPodLspEnabled map[string]*bool) []libovsdbtest.TestData { data := []libovsdbtest.TestData{} for _, ocInfo := range em.fakeOvn.userDefinedNetworkControllers { nodeslsps := make(map[string][]string) @@ -260,10 +260,8 @@ func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsW UUID: switchName + "-UUID", Name: switchName, Ports: nodeslsps[switchName], - ExternalIDs: map[string]string{ - ovntypes.NetworkExternalID: ocInfo.bnc.GetNetworkName(), - ovntypes.NetworkRoleExternalID: util.GetUserDefinedNetworkRole(isPrimary), - }, + + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(ocInfo.bnc), OtherConfig: otherConfig, ACLs: acls[switchName], } @@ -331,7 +329,7 @@ func newExpectedSwitchPort(lspUUID string, portName string, podAddr string, pod ovntypes.TopologyExternalID: netInfo.TopologyType(), }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, PortSecurity: []string{podAddr}, diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 0f41bc4e95..5dfc0f59dc 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -152,7 +152,7 @@ func getExpectedDataPodsAndSwitchesForUserDefinedNetwork(fakeOvn *FakeOVN, pods ovntypes.TopologyExternalID: ocInfo.bnc.TopologyType(), }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, @@ -454,91 +454,96 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("correctly creates and deletes network policy and multi network policy with the same policy", func() { - app.Action = func(*cli.Context) error { - var err error - - topology := ovntypes.Layer2Topology - subnets := "10.1.0.0/24" - setUserDefinedNetworkTestData(topology, subnets) - - namespace1 := *newNamespace(namespaceName1) - nPodTest := getTestPod(namespace1.Name, nodeName) - nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) - networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - - watchNodes := false - node := *newNode(nodeName, "192.168.126.202/24") + ginkgo.DescribeTable("correctly creates and deletes network policy and multi network policy with the same policy", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + var err error - startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, - []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + topology := ovntypes.Layer2Topology + subnets := "10.1.0.0/24" + setUserDefinedNetworkTestData(topology, subnets) - ginkgo.By("Creating networkPolicy applied to the pod") - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + namespace1 := *newNamespace(namespaceName1) + nPodTest := getTestPod(namespace1.Name, nodeName) + nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) + networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + watchNodes := false + node := *newNode(nodeName, "192.168.126.202/24") - dataParams := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(nPodTest.portUUID). - withTCPPeerPorts(portNum) - gressPolicyExpectedData1 := getPolicyData(dataParams) - defaultDenyExpectedData1 := getDefaultDenyData(dataParams) - initData := getUpdatedInitialDB([]testPod{nPodTest}) - expectedData1 := append(initData, gressPolicyExpectedData1...) - expectedData1 = append(expectedData1, defaultDenyExpectedData1...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, + []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) - ginkgo.By("Creating multi-networkPolicy applied to the pod") - mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) - mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + ginkgo.By("Creating networkPolicy applied to the pod") + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Create(context.TODO(), mpolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + + dataParams := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(nPodTest.portUUID). + withTCPPeerPorts(portNum) + gressPolicyExpectedData1 := getPolicyData(dataParams) + defaultDenyExpectedData1 := getDefaultDenyData(dataParams) + initData := getUpdatedInitialDB([]testPod{nPodTest}) + expectedData1 := append(initData, gressPolicyExpectedData1...) + expectedData1 = append(expectedData1, defaultDenyExpectedData1...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + + ginkgo.By("Creating multi-networkPolicy applied to the pod") + mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) + mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Create(context.TODO(), mpolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] - portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) - gomega.Expect(portInfo).NotTo(gomega.BeNil()) - ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) + gomega.Expect(portInfo).NotTo(gomega.BeNil()) + ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + + dataParams2 := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(portInfo.portUUID). + withTCPPeerPorts(portNum). + withNetInfo(netInfo) + gressPolicyExpectedData2 := getPolicyData(dataParams2) + defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) + expectedData2 := append(expectedData1, gressPolicyExpectedData2...) + expectedData2 = append(expectedData2, defaultDenyExpectedData2...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + + // Delete the multi network policy + ginkgo.By("Deleting the multi network policy") + err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) - dataParams2 := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(portInfo.portUUID). - withTCPPeerPorts(portNum). - withNetInfo(netInfo) - gressPolicyExpectedData2 := getPolicyData(dataParams2) - defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) - expectedData2 := append(expectedData1, gressPolicyExpectedData2...) - expectedData2 = append(expectedData2, defaultDenyExpectedData2...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + ginkgo.By("Deleting the network policy") + err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Delete the multi network policy - ginkgo.By("Deleting the multi network policy") - err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) + return nil + } - ginkgo.By("Deleting the network policy") - err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.DescribeTable("correctly adds and deletes pod IPs from secondary network namespace address set", func(topology string, remote bool) { @@ -571,7 +576,6 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { if remote { config.OVNKubernetesFeature.EnableInterconnect = true node.Annotations["k8s.ovn.org/zone-name"] = "remote" - node.Annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" node.Annotations, err = util.UpdateNetworkIDAnnotation(node.Annotations, ovntypes.DefaultNetworkName, 0) gomega.Expect(err).NotTo(gomega.HaveOccurred()) if topology != ovntypes.LocalnetTopology { diff --git a/go-controller/pkg/ovn/network_segmentation_test.go b/go-controller/pkg/ovn/network_segmentation_test.go index cfcc0f7e83..97e48ec1b9 100644 --- a/go-controller/pkg/ovn/network_segmentation_test.go +++ b/go-controller/pkg/ovn/network_segmentation_test.go @@ -85,7 +85,7 @@ var _ = ginkgo.Describe("OVN Pod Operations with network segmentation", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t1.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t1), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index c137141129..52bbbf5956 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -39,6 +39,7 @@ import ( egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" udnclientfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" @@ -179,6 +180,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { IPAMClaimsClient: fakeipamclaimclient.NewSimpleClientset(ipamClaimObjects...), NetworkAttchDefClient: nadClient, UserDefinedNetworkClient: udnclientfake.NewSimpleClientset(), + VTEPClient: vtepfake.NewSimpleClientset(), } o.init(nads) } diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index e877cb9af6..e43a2cb31f 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -379,6 +379,9 @@ func (oc *DefaultNetworkController) addLogicalPort(pod *corev1.Pod) (err error) // Add the pod's logical switch port to the port cache _ = oc.logicalPortCache.add(pod, switchName, types.DefaultNetworkName, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if oc.onLogicalPortCacheAdd != nil { + oc.onLogicalPortCacheAdd(pod, types.DefaultNetworkName) + } if kubevirt.IsPodLiveMigratable(pod) { if err := oc.ensureDHCP(pod, podAnnotation, lsp); err != nil { diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index c5ed638bc4..437e15bee3 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/google/uuid" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" "github.com/urfave/cli/v2" @@ -142,6 +143,7 @@ func newNode(nodeName, nodeIPv4CIDR string) *corev1.Node { "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4CIDR, ""), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4CIDR), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, Labels: map[string]string{ @@ -167,6 +169,7 @@ func newNodeGlobalZoneNotEgressableV4Only(nodeName, nodeIPv4 string) *corev1.Nod "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, ""), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, }, @@ -189,6 +192,7 @@ func newNodeGlobalZoneNotEgressableV6Only(nodeName, nodeIPv6 string) *corev1.Nod "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", nodeIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv6), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, }, @@ -210,19 +214,20 @@ func newNodeGlobalZoneNotEgressableV6Only(nodeName, nodeIPv6 string) *corev1.Nod } type testPod struct { - portUUID string - nodeName string - nodeSubnet string - nodeMgtIP string - nodeGWIP string - podName string - podIP string - podMAC string - namespace string - portName string - routes []util.PodRoute - noIfaceIdVer bool - networkRole string + portUUID string + nodeName string + nodeChassisID string + nodeSubnet string + nodeMgtIP string + nodeGWIP string + podName string + podIP string + podMAC string + namespace string + portName string + routes []util.PodRoute + noIfaceIdVer bool + networkRole string udnPodInfos map[string]*udnPodInfo } @@ -245,21 +250,40 @@ type portInfo struct { prefixLen int } +func chassisIDForNode(nodeName string) string { + return uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() +} + +func requestedChassisForPod(pod testPod) string { + if pod.nodeChassisID != "" { + return pod.nodeChassisID + } + if pod.nodeName == "" { + return "" + } + return chassisIDForNode(pod.nodeName) +} + func newTPod(nodeName, nodeSubnet, nodeMgtIP, nodeGWIP, podName, podIPs, podMAC, namespace string) testPod { portName := util.GetLogicalPortName(namespace, podName) + nodeChassisID := "" + if nodeName != "" { + nodeChassisID = chassisIDForNode(nodeName) + } to := testPod{ - portUUID: portName + "-UUID", - nodeSubnet: nodeSubnet, - nodeMgtIP: nodeMgtIP, - nodeGWIP: nodeGWIP, - podIP: podIPs, - podMAC: podMAC, - portName: portName, - nodeName: nodeName, - podName: podName, - namespace: namespace, - udnPodInfos: map[string]*udnPodInfo{}, - networkRole: ovntypes.NetworkRolePrimary, // all tests here run with network-segmentation disabled by default by default + portUUID: portName + "-UUID", + nodeSubnet: nodeSubnet, + nodeMgtIP: nodeMgtIP, + nodeGWIP: nodeGWIP, + podIP: podIPs, + podMAC: podMAC, + portName: portName, + nodeName: nodeName, + nodeChassisID: nodeChassisID, + podName: podName, + namespace: namespace, + udnPodInfos: map[string]*udnPodInfo{}, + networkRole: ovntypes.NetworkRolePrimary, // all tests here run with network-segmentation disabled by default by default } var routeSources []*net.IPNet @@ -479,7 +503,7 @@ func getExpectedDataPodsSwitchesPortGroup(netInfo util.NetInfo, pods []testPod, "namespace": pod.namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, PortSecurity: []string{podAddr}, @@ -2030,7 +2054,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t2.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t2), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, @@ -2045,7 +2069,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { "namespace": t2.namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: t2.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t2), //"iface-id-ver": is empty to check that it won't be set on update }, PortSecurity: []string{fmt.Sprintf("%s %s", t2.podMAC, t2.podIP)}, @@ -2060,7 +2084,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t3.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t3), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, @@ -2230,7 +2254,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t1.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t1), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, diff --git a/go-controller/pkg/ovn/policy_stale_test.go b/go-controller/pkg/ovn/policy_stale_test.go index c1bc791f14..5bb5ac44ca 100644 --- a/go-controller/pkg/ovn/policy_stale_test.go +++ b/go-controller/pkg/ovn/policy_stale_test.go @@ -3,6 +3,7 @@ package ovn import ( "context" "fmt" + "strings" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -40,6 +41,9 @@ func getStaleDefaultDenyACL(netpolName, namespace, match string, deny, egress bo name := namespace + "_" + netpolName if !deny { aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, arpAllowACL) + if strings.Contains(match, "icmp") { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, icmpAllowACL) + } priority = types.DefaultAllowPriority action = nbdb.ACLActionAllow name = getStaleARPAllowACLName(namespace) @@ -67,40 +71,54 @@ func getStaleARPAllowACLName(ns string) string { // getStaleDefaultDenyData builds stale ACLs and port groups for given netpol func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.TestData { + return getStaleDefaultDenyDataWithICMP(networkPolicy, config.OVNKubernetesFeature.AllowICMPNetworkPolicy) +} + +func getStaleDefaultDenyDataWithICMP(networkPolicy *knet.NetworkPolicy, includeICMP bool) []libovsdbtest.TestData { namespace := networkPolicy.Namespace netpolName := networkPolicy.Name fakeController := getFakeBaseController(&util.DefaultNetInfo{}) egressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLEgress) egressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName, true, true) - egressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + egressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + + testData := []libovsdbtest.TestData{egressDenyACL, egressARPAllowACL} + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if includeICMP { + egressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, false, true) + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) ingressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName, true, false) - ingressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + ingressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + testData = append(testData, ingressDenyACL, ingressARPAllowACL) + if includeICMP { + ingressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, false, false) + testData = append(testData, ingressICMPAllowACL) + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), nil, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), nil, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } // getStalePolicyACLs builds stale ACLs for given peers @@ -250,14 +268,47 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { ginkgo.Context("on startup", func() { - ginkgo.It("reconciles an existing networkPolicy updating stale ACLs", func() { + ginkgo.DescribeTable("reconciles an existing networkPolicy updating stale ACLs", + func(allowICMPNetworkPolicy bool) { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + // start with stale ACLs + gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) + defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) + initialData := initialDB.NBData + initialData = append(initialData, gressPolicyInitialData...) + initialData = append(initialData, defaultDenyInitialData...) + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1, namespace2}, + []knet.NetworkPolicy{*networkPolicy}) + + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName1) + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName2) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // make sure stale ACLs were updated + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) + + ginkgo.It("reconciles with allow ICMP network policy disabled and removes stale ICMP default deny ACLs", func() { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = false namespace1 := *newNamespace(namespaceName1) namespace2 := *newNamespace(namespaceName2) networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, namespace2.Name, "", true, true) - // start with stale ACLs + // start with stale ACLs containing ICMP allow ACLs from a previously enabled config gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) - defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) + defaultDenyInitialData := getStaleDefaultDenyDataWithICMP(networkPolicy, true) initialData := initialDB.NBData initialData = append(initialData, gressPolicyInitialData...) initialData = append(initialData, defaultDenyInitialData...) @@ -270,7 +321,7 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // make sure stale ACLs were updated + // make sure stale ICMP ACLs were removed to match disabled allow-icmp config expectedData := getNamespaceWithSinglePolicyExpectedData( newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), initialDB.NBData) diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index c02af4575f..1dfe375dc4 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -6,6 +6,7 @@ import ( "net" "runtime" "sort" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -107,7 +108,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * egressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, arpAllowACL) - egressAllowACL := libovsdbops.BuildACL( + egressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionFromLport, types.DefaultAllowPriority, @@ -122,7 +123,36 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * }, types.DefaultACLTier, ) - egressAllowACL.UUID = aclIDs.String() + "-UUID" + egressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + testData := []libovsdbtest.TestData{ + egressDenyACL, + egressARPAllowACL, + } + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, icmpAllowACL) + egressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionFromLport, + types.DefaultAllowPriority, + "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + map[string]string{ + "apply-after-lb": "true", + }, + types.DefaultACLTier, + ) + egressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, defaultDenyACL) @@ -142,7 +172,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, arpAllowACL) - ingressAllowACL := libovsdbops.BuildACL( + ingressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionToLport, types.DefaultAllowPriority, @@ -155,7 +185,31 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * nil, types.DefaultACLTier, ) - ingressAllowACL.UUID = aclIDs.String() + "-UUID" + ingressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, icmpAllowACL) + ingressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + ingressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } + + for _, acl := range ingressACLs { + testData = append(testData, acl) + } lsps := []*nbdb.LogicalSwitchPort{} for _, uuid := range params.localPortUUIDs { @@ -166,10 +220,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * if policyTypeEgress { egressDenyPorts = lsps } + egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), egressDenyPorts, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" @@ -180,18 +235,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), ingressDenyPorts, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } func getDefaultDenyData(params *netpolDataParams) []libovsdbtest.TestData { @@ -291,10 +339,24 @@ func getGressACLs(gressIdx int, peers []knet.NetworkPolicyPeer, policyType knet. acl.UUID = dbIDs.String() + "-UUID" acls = append(acls, acl) } - for i, ipBlock := range ipBlocks { - match := fmt.Sprintf("ip4.%s == %s && %s == @%s", ipDir, ipBlock, portDir, pgName) + if len(ipBlocks) > 0 { + var ipBlockMatches []string + for _, ipBlock := range ipBlocks { + ipVersion := "ip4" + if utilnet.IsIPv6CIDRString(ipBlock) { + ipVersion = "ip6" + } + ipBlockMatches = append(ipBlockMatches, fmt.Sprintf("%s.%s == %s", ipVersion, ipDir, ipBlock)) + } + var match string + if len(ipBlockMatches) == 1 { + match = ipBlockMatches[0] + } else { + match = fmt.Sprintf("(%s)", strings.Join(ipBlockMatches, " || ")) + } + match = fmt.Sprintf("%s && %s == @%s", match, portDir, pgName) action := allowAction(params.statelessNetPol) - dbIDs := gp.getNetpolACLDbIDs(i, libovsdbutil.UnspecifiedL4Protocol) + dbIDs := gp.getNetpolACLDbIDs(ipBlockCombinedIdx, libovsdbutil.UnspecifiedL4Protocol) acl := libovsdbops.BuildACL( libovsdbutil.GetACLName(dbIDs), direction, @@ -361,6 +423,17 @@ func getPolicyData(params *netpolDataParams) []libovsdbtest.TestData { acls = append(acls, getGressACLs(i, egress.To, knet.PolicyTypeEgress, params)...) } + pg := getPolicyPortGroup(params, acls) + + data := []libovsdbtest.TestData{} + for _, acl := range acls { + data = append(data, acl) + } + data = append(data, pg) + return data +} + +func getPolicyPortGroup(params *netpolDataParams, acls []*nbdb.ACL) *nbdb.PortGroup { lsps := []*nbdb.LogicalSwitchPort{} for _, uuid := range params.localPortUUIDs { lsps = append(lsps, &nbdb.LogicalSwitchPort{UUID: uuid}) @@ -375,12 +448,7 @@ func getPolicyData(params *netpolDataParams) []libovsdbtest.TestData { ) pg.UUID = pg.Name + "-UUID" - data := []libovsdbtest.TestData{} - for _, acl := range acls { - data = append(data, acl) - } - data = append(data, pg) - return data + return pg } func newNetpolDataParams(networkPolicy *knet.NetworkPolicy) *netpolDataParams { @@ -776,33 +844,38 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) - ginkgo.It("reconciles an existing networkPolicy with empty db", func() { - app.Action = func(*cli.Context) error { - namespace1 := *newNamespace(namespaceName1) - namespace2 := *newNamespace(namespaceName2) - namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) - namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) - // add namespaces to initial Database - initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) - - networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, - namespace2.Name, "", true, true) - startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, - nil, nil) - - _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.DescribeTable("reconciles an existing networkPolicy with empty db", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) + namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) + // add namespaces to initial Database + initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) + + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, + nil, nil) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedData := getNamespaceWithSinglePolicyExpectedData( - newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), - initialDB.NBData) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) - return nil - } + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) + return nil + } - gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) - }) + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.It("reconciles an ingress networkPolicy updating an existing ACL", func() { app.Action = func(*cli.Context) error { @@ -956,6 +1029,149 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { } gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) + + ginkgo.It("reconciles existing networkPolicies with has legacy ipBlock ACLs", func() { + app.Action = func(*cli.Context) error { + namespace1 := *newNamespace(namespaceName1) + namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) + peer := knet.NetworkPolicyPeer{ + IPBlock: &knet.IPBlock{ + CIDR: "1.1.1.1", + }, + } + // equivalent rules in one peer + networkPolicy1 := newNetworkPolicy(netPolicyName1, namespace1.Name, metav1.LabelSelector{}, + []knet.NetworkPolicyIngressRule{{ + From: []knet.NetworkPolicyPeer{peer, peer}, + }}, nil) + // equivalent rules in different peers + networkPolicy2 := newNetworkPolicy(netPolicyName2, namespace1.Name, metav1.LabelSelector{}, + []knet.NetworkPolicyIngressRule{ + { + From: []knet.NetworkPolicyPeer{peer}, + }, + { + From: []knet.NetworkPolicyPeer{peer}, + }, + }, nil) + initialData := initialDB.NBData + initialData = append(initialData, namespace1AddressSetv4) + defaultDenyExpectedData := getDefaultDenyDataMultiplePolicies([]*knet.NetworkPolicy{networkPolicy1, networkPolicy2}) + initialData = append(initialData, defaultDenyExpectedData...) + + // NetworkPolicy 1 contains a single gress policy that previously + // created one legacy ACL per ipBlock. Simulate two legacy ACLs + // corresponding to ipBlock indexes 0 and 1 of the gress policy. + // ACL1 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 0 + // ACL2 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 1 + netInfo := &util.DefaultNetInfo{} + fakeController := getFakeBaseController(netInfo) + controllerName := getNetworkControllerName(netInfo.GetNetworkName()) + pgName1 := fakeController.getNetworkPolicyPGName(namespace1.Name, networkPolicy1.Name) + gp1 := gressPolicy{ + policyNamespace: networkPolicy1.Namespace, + policyName: networkPolicy1.Name, + policyType: knet.PolicyTypeIngress, + idx: 0, + controllerName: controllerName, + } + var legacyACLPolicy1 []*nbdb.ACL + for idx := 0; idx < 2; idx++ { + legacyACLIDs := gp1.getNetpolACLDbIDs(idx, libovsdbutil.UnspecifiedL4Protocol) + legacyACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLIDs), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName1), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLIDs.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL.UUID = legacyACLIDs.String() + "-UUID" + initialData = append(initialData, legacyACL) + legacyACLPolicy1 = append(legacyACLPolicy1, legacyACL) + } + pgNetworkPolicy1 := getPolicyPortGroup(newNetpolDataParams(networkPolicy1), legacyACLPolicy1) + initialData = append(initialData, pgNetworkPolicy1) + + // NetworkPolicy 2 contains two gress policies, each with one legacy + // ACL per ipBlock. Simulate two legacy ACL corresponding to gress + // policy indexes 0 and 1, respectively. + // ACL1 => libovsdbops.GressIdxKey: 0, libovsdbops.IpBlockIndexKey: 0 + // ACL2 => libovsdbops.GressIdxKey: 1, libovsdbops.IpBlockIndexKey: 0 + pgName2 := fakeController.getNetworkPolicyPGName(namespace1.Name, networkPolicy2.Name) + firstgp2 := gressPolicy{ + policyNamespace: networkPolicy2.Namespace, + policyName: networkPolicy2.Name, + policyType: knet.PolicyTypeIngress, + idx: 0, + controllerName: controllerName, + } + secondgp2 := gressPolicy{ + policyNamespace: networkPolicy2.Namespace, + policyName: networkPolicy2.Name, + policyType: knet.PolicyTypeIngress, + idx: 1, + controllerName: controllerName, + } + legacyACLID := firstgp2.getNetpolACLDbIDs(0, libovsdbutil.UnspecifiedL4Protocol) + legacyACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLID), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName2), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLID.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL.UUID = legacyACLID.String() + "-UUID" + initialData = append(initialData, legacyACL) + + legacyACLID2 := secondgp2.getNetpolACLDbIDs(0, libovsdbutil.UnspecifiedL4Protocol) + legacyACL2 := libovsdbops.BuildACL( + libovsdbutil.GetACLName(legacyACLID2), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + fmt.Sprintf("ip4.src == 1.1.1.1 && outport == @%s", pgName2), + nbdb.ACLActionAllowRelated, + types.OvnACLLoggingMeter, + "", + false, + legacyACLID2.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + legacyACL2.UUID = legacyACLID2.String() + "-UUID" + initialData = append(initialData, legacyACL2) + pgNetworkPolicy2 := getPolicyPortGroup(newNetpolDataParams(networkPolicy2), []*nbdb.ACL{legacyACL, legacyACL2}) + initialData = append(initialData, pgNetworkPolicy2) + + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1}, + []knet.NetworkPolicy{*networkPolicy1, *networkPolicy2}, + nil, nil) + + // check the initial data is updated and all legacy ACLs should be cleaned up + gressPolicy1ExpectedData := getPolicyData(newNetpolDataParams(networkPolicy1)) + gressPolicy2ExpectedData := getPolicyData(newNetpolDataParams(networkPolicy2)) + finalData := initialDB.NBData + finalData = append(finalData, namespace1AddressSetv4) + finalData = append(finalData, gressPolicy1ExpectedData...) + finalData = append(finalData, gressPolicy2ExpectedData...) + finalData = append(finalData, defaultDenyExpectedData...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalData)) + + return nil + } + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }) }) ginkgo.Context("during execution", func() { diff --git a/go-controller/pkg/ovn/routeimport/route_import.go b/go-controller/pkg/ovn/routeimport/route_import.go index 18c372c276..e99c948edd 100644 --- a/go-controller/pkg/ovn/routeimport/route_import.go +++ b/go-controller/pkg/ovn/routeimport/route_import.go @@ -343,11 +343,13 @@ func (c *controller) syncNetwork(network string) error { c.setTableForNetworkUnlocked(info.GetNetworkID(), table) c.Unlock() - // skip routes in the pod network - // TODO do not skip these routes in no overlay mode - ignoreSubnets := make([]*net.IPNet, len(info.Subnets())) - for i, subnet := range info.Subnets() { - ignoreSubnets[i] = subnet.CIDR + var ignoreSubnets []*net.IPNet + if info.Transport() != types.NetworkTransportNoOverlay { + // if the network is overlay mode, skip routes to the pod network + ignoreSubnets = make([]*net.IPNet, len(info.Subnets())) + for i, subnet := range info.Subnets() { + ignoreSubnets[i] = subnet.CIDR + } } expected, err := c.getBGPRoutes(table, ignoreSubnets) @@ -431,6 +433,7 @@ func (c *controller) getBGPRoutes(table int, ignoreSubnets []*net.IPNet) (sets.S routes := sets.New[route]() for _, nlroute := range nlroutes { if util.IsContainedInAnyCIDR(nlroute.Dst, ignoreSubnets...) { + c.log.V(5).Info("Ignore BGP route", "table", table, "route", stringer{nlroute}) continue } routes.Insert(routesFromNetlinkRoute(&nlroute)...) diff --git a/go-controller/pkg/ovn/routeimport/route_import_test.go b/go-controller/pkg/ovn/routeimport/route_import_test.go index cf71a392a8..b79f93101d 100644 --- a/go-controller/pkg/ovn/routeimport/route_import_test.go +++ b/go-controller/pkg/ovn/routeimport/route_import_test.go @@ -2,6 +2,7 @@ package routeimport import ( "errors" + "net" "sync" "testing" @@ -13,6 +14,7 @@ import ( "k8s.io/client-go/util/workqueue" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" controllerutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" ovntesting "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -26,16 +28,33 @@ import ( func Test_controller_syncNetwork(t *testing.T) { node := "testnode" + // Capture original global config values and restore after test + origClusterSubnets := config.Default.ClusterSubnets + t.Cleanup(func() { + config.Default.ClusterSubnets = origClusterSubnets + }) + defaultNetwork := &util.DefaultNetInfo{} defaultNetworkRouter := defaultNetwork.GetNetworkScopedGWRouterName(node) defaultNetworkRouterPort := types.GWRouterToExtSwitchPrefix + defaultNetworkRouter + config.Default.ClusterSubnets = []config.CIDRNetworkEntry{ + { + CIDR: &net.IPNet{ + IP: net.IPv4(10, 128, 0, 0), + Mask: net.CIDRMask(16, 32), + }, + HostSubnetLength: 24, + }, + } + udn := &multinetworkmocks.NetInfo{} udn.On("IsDefault").Return(false) udn.On("GetNetworkName").Return("udn") udn.On("GetNetworkID").Return(1) udn.On("Subnets").Return(nil) udn.On("GetNetworkScopedGWRouterName", node).Return("router") + udn.On("Transport").Return("") cudn := &multinetworkmocks.NetInfo{} cudn.On("IsDefault").Return(false) @@ -43,6 +62,7 @@ func Test_controller_syncNetwork(t *testing.T) { cudn.On("GetNetworkID").Return(2) cudn.On("Subnets").Return(nil) cudn.On("GetNetworkScopedGWRouterName", node).Return("router") + cudn.On("Transport").Return("") type fields struct { networkIDs map[int]string @@ -52,16 +72,17 @@ func Test_controller_syncNetwork(t *testing.T) { network string } tests := []struct { - name string - fields fields - args args - initial []libovsdb.TestData - expected []libovsdb.TestData - routes []netlink.Route - link netlink.Link - linkErr bool - routesErr bool - wantErr bool + name string + fields fields + args args + initial []libovsdb.TestData + expected []libovsdb.TestData + routes []netlink.Route + link netlink.Link + noOverlayEnabled bool + linkErr bool + routesErr bool + wantErr bool }{ { name: "ignored if network not known", @@ -168,11 +189,61 @@ func Test_controller_syncNetwork(t *testing.T) { &nbdb.LogicalRouterStaticRoute{UUID: "untouched-1", IPPrefix: "3.3.3.0/24", Nexthop: "3.3.3.2", ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, }, }, + { + name: "ignores host subnet routes as necessary in overlay mode", + args: args{"default"}, + fields: fields{ + networkIDs: map[int]string{0: "default"}, + networks: map[string]util.NetInfo{"default": defaultNetwork}, + }, + link: &netlink.Vrf{Table: unix.RT_TABLE_MAIN}, + initial: []libovsdb.TestData{ + &nbdb.LogicalRouter{Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + routes: []netlink.Route{ + {Dst: ovntesting.MustParseIPNet("1.1.1.0/24"), Gw: ovntesting.MustParseIP("1.1.1.1")}, + {Dst: ovntesting.MustParseIPNet("10.128.1.0/24"), Gw: ovntesting.MustParseIP("2.2.2.1")}, + }, + expected: []libovsdb.TestData{ + &nbdb.LogicalRouter{UUID: "router", Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + }, + { + name: "adds host subnet routes as necessary in no-overlay mode", + noOverlayEnabled: true, + args: args{"default"}, + fields: fields{ + networkIDs: map[int]string{0: "default"}, + networks: map[string]util.NetInfo{"default": defaultNetwork}, + }, + link: &netlink.Vrf{Table: unix.RT_TABLE_MAIN}, + initial: []libovsdb.TestData{ + &nbdb.LogicalRouter{Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + routes: []netlink.Route{ + {Dst: ovntesting.MustParseIPNet("1.1.1.0/24"), Gw: ovntesting.MustParseIP("1.1.1.1")}, + {Dst: ovntesting.MustParseIPNet("10.128.1.0/24"), Gw: ovntesting.MustParseIP("2.2.2.1")}, + }, + expected: []libovsdb.TestData{ + &nbdb.LogicalRouter{UUID: "router", Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1", "add-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + &nbdb.LogicalRouterStaticRoute{UUID: "add-1", IPPrefix: "10.128.1.0/24", Nexthop: "2.2.2.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := gomega.NewWithT(t) + // Capture and restore global config value for this subtest + origTransport := config.Default.Transport + t.Cleanup(func() { + config.Default.Transport = origTransport + }) + testError := errors.New("test forced error or incorrect test arguments") network := tt.fields.networks[tt.args.network] @@ -211,6 +282,10 @@ func Test_controller_syncNetwork(t *testing.T) { netlink: nlmock, } + if tt.noOverlayEnabled { + config.Default.Transport = types.NetworkTransportNoOverlay + } + err = c.syncNetwork(tt.args.network) if tt.wantErr { g.Expect(err).To(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/topology/topologyfactory.go b/go-controller/pkg/ovn/topology/topologyfactory.go index ead14e05b2..080b306b23 100644 --- a/go-controller/pkg/ovn/topology/topologyfactory.go +++ b/go-controller/pkg/ovn/topology/topologyfactory.go @@ -36,7 +36,7 @@ func (gtf *GatewayTopologyFactory) NewClusterRouterWithMulticastSupport( netInfo util.NetInfo, coopUUID string, ) (*nbdb.LogicalRouter, error) { - routerOptions := map[string]string{"mcast_relay": "true"} + routerOptions := map[string]string{"mcast_relay": "true", "always_learn_from_arp_request": "false"} return gtf.newClusterRouter(clusterRouterName, netInfo, coopUUID, routerOptions) } diff --git a/go-controller/pkg/ovn/topology/topologyfactory_test.go b/go-controller/pkg/ovn/topology/topologyfactory_test.go index 4d189e030a..dbed43ffb5 100644 --- a/go-controller/pkg/ovn/topology/topologyfactory_test.go +++ b/go-controller/pkg/ovn/topology/topologyfactory_test.go @@ -88,7 +88,7 @@ var _ = Describe("Topology factory", func() { ovntypes.TopologyExternalID: ovntypes.Layer3Topology, "k8s-cluster-router": "yes", } - expectedOptions := map[string]string{"mcast_relay": "true"} + expectedOptions := map[string]string{"mcast_relay": "true", "always_learn_from_arp_request": "false"} Expect(clusterRouter).To( WithTransform( removeUUID, diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go index b838221892..13a370ef90 100644 --- a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go @@ -160,12 +160,18 @@ func (zch *ZoneChassisHandler) createOrUpdateNodeChassis(node *corev1.Node, isRe } chassis := sbdb.Chassis{ - Name: chassisID, - Hostname: node.Name, + Name: chassisID, OtherConfig: map[string]string{ "is-remote": strconv.FormatBool(isRemote), }, } + if isRemote { + // For debugging purposes we add KAPI node name as the chassis hostname. + // It is not used for anything other than a helpful hint for debugging. + // There is no need to set it for the local node, as ovn-controller will + // set it automatically from the OVS external_id:hostname field. + chassis.Hostname = node.Name + } return libovsdbops.CreateOrUpdateChassis(zch.sbClient, &chassis, encaps...) } diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index 23a310c9ab..b269fd2052 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "strconv" + "strings" "time" corev1 "k8s.io/api/core/v1" @@ -173,12 +174,20 @@ func (zic *ZoneInterconnectHandler) createOrUpdateTransitSwitch(networkID int) e // ensureTransitSwitch sets up the global transit switch required for interoperability with other zones // Must wait for network id to be annotated to any node by cluster manager -func (zic *ZoneInterconnectHandler) ensureTransitSwitch(nodes []*corev1.Node) error { - if len(nodes) == 0 { // nothing to do - return nil - } +func (zic *ZoneInterconnectHandler) ensureTransitSwitch() error { start := time.Now() + // Get the transit switch. If its not present no cleanup to do + ts := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + } + + _, err := libovsdbops.GetLogicalSwitch(zic.nbClient, ts) + if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { + return err + } + + // Create the transit switch if it doesn't exist if err := zic.createOrUpdateTransitSwitch(zic.GetNetworkID()); err != nil { return err } @@ -198,6 +207,10 @@ func (zic *ZoneInterconnectHandler) AddLocalZoneNode(node *corev1.Node) error { return fmt.Errorf("failed to get node id for node - %s", node.Name) } + if err := zic.ensureTransitSwitch(); err != nil { + return fmt.Errorf("ensuring transit switch for local zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + if err := zic.createLocalZoneNodeResources(node, nodeID); err != nil { return fmt.Errorf("creating interconnect resources for local zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) } @@ -257,6 +270,10 @@ func (zic *ZoneInterconnectHandler) AddRemoteZoneNode(node *corev1.Node) error { } } + if err := zic.ensureTransitSwitch(); err != nil { + return fmt.Errorf("ensuring transit switch for remote zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + klog.Infof("Creating interconnect resources for remote zone node %s for the network %s", node.Name, zic.GetNetworkName()) if err := zic.createRemoteZoneNodeResources(node, nodeID, nodeTransitSwitchPortIPs, nodeSubnets, nodeGRPIPs); err != nil { @@ -273,58 +290,94 @@ func (zic *ZoneInterconnectHandler) DeleteNode(node *corev1.Node) error { return zic.cleanupNode(node.Name) } -// SyncNodes ensures a transit switch exists and cleans up the interconnect -// resources present in the OVN Northbound db for the stale nodes -func (zic *ZoneInterconnectHandler) SyncNodes(objs []interface{}) error { +// CleanupStaleNodes cleans up the interconnect resources for stale nodes. +func (zic *ZoneInterconnectHandler) CleanupStaleNodes(objs []interface{}) error { + // Build set of current node names foundNodeNames := sets.New[string]() - foundNodes := make([]*corev1.Node, len(objs)) - for i, obj := range objs { + for _, obj := range objs { node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("spurious object in syncNodes: %v", obj) + return fmt.Errorf("spurious object in CleanupStaleNodes: %v", obj) } foundNodeNames.Insert(node.Name) - foundNodes[i] = node } + staleNodeNames := sets.New[string]() - // Get the transit switch. If its not present no cleanup to do + // Get the transit switch ts := &nbdb.LogicalSwitch{ Name: zic.networkTransitSwitchName, } - ts, err := libovsdbops.GetLogicalSwitch(zic.nbClient, ts) - if err != nil { - if errors.Is(err, libovsdbclient.ErrNotFound) { - // This can happen for the first time when interconnect is enabled. - // Let's ensure the transit switch exists - return zic.ensureTransitSwitch(foundNodes) - } + if err == nil { + // Transit switch exists - find stale nodes by checking transit switch ports + for _, p := range ts.Ports { + lp := &nbdb.LogicalSwitchPort{ + UUID: p, + } - return err - } + lp, err := libovsdbops.GetLogicalSwitchPort(zic.nbClient, lp) + if err != nil { + continue + } + + if lp.ExternalIDs == nil { + continue + } - staleNodeNames := []string{} - for _, p := range ts.Ports { - lp := &nbdb.LogicalSwitchPort{ - UUID: p, + lportNode := lp.ExternalIDs["node"] + if lportNode != "" && !foundNodeNames.Has(lportNode) { + staleNodeNames.Insert(lportNode) + } + } + } else if errors.Is(err, libovsdbclient.ErrNotFound) { + // Transit switch doesn't exist - discover nodes from cluster router resources + lr := &nbdb.LogicalRouter{Name: zic.networkClusterRouterName} + lr, err = libovsdbops.GetLogicalRouter(zic.nbClient, lr) + if err != nil { + if !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to get cluster router: %w", err) + } + // Router doesn't exist, nothing to cleanup + return nil } - lp, err = libovsdbops.GetLogicalSwitchPort(zic.nbClient, lp) + // Discover remote zone nodes from static routes with ic-node external ID + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(zic.nbClient, lr, p) if err != nil { - continue + return fmt.Errorf("failed to get static routes for cluster router: %w", err) } - if lp.ExternalIDs == nil { - continue + for _, route := range routes { + nodeName := route.ExternalIDs["ic-node"] + if nodeName != "" && !foundNodeNames.Has(nodeName) { + staleNodeNames.Insert(nodeName) + } } - lportNode := lp.ExternalIDs["node"] - if !foundNodeNames.Has(lportNode) { - staleNodeNames = append(staleNodeNames, lportNode) + // Discover local zone nodes from router ports connecting to transit switch + routerPortPrefix := zic.GetNetworkScopedName(types.RouterToTransitSwitchPrefix) + for _, portUUID := range lr.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(zic.nbClient, &nbdb.LogicalRouterPort{UUID: portUUID}) + if err != nil { + continue + } + // Extract node name from port name (e.g., "rtots-node1" -> "node1") + if nodeName, found := strings.CutPrefix(lrp.Name, routerPortPrefix); found { + if nodeName != "" && !foundNodeNames.Has(nodeName) { + staleNodeNames.Insert(nodeName) + } + } } + } else { + // Unexpected error + return fmt.Errorf("unexpected error while getting transit switch: %w", err) } - for _, staleNodeName := range staleNodeNames { + // Cleanup stale interconnect resources + for _, staleNodeName := range staleNodeNames.UnsortedList() { if err := zic.cleanupNode(staleNodeName); err != nil { klog.Errorf("Failed to cleanup the interconnect resources from OVN Northbound db for the stale node %s: %v", staleNodeName, err) } @@ -333,10 +386,25 @@ func (zic *ZoneInterconnectHandler) SyncNodes(objs []interface{}) error { return nil } -// Cleanup deletes the transit switch for the network +// Cleanup deletes all interconnect resources for the network, including all node resources +// (ports, router ports, static routes) and the transit switch itself. This method is idempotent +// and safe to call multiple times. func (zic *ZoneInterconnectHandler) Cleanup() error { + klog.Infof("Cleaning up all interconnect resources for network %s", zic.GetNetworkName()) + + // First cleanup all node resources (ports, routes, etc.) + // Passing nil removes all nodes from the transit switch + if err := zic.CleanupStaleNodes(nil); err != nil { + return fmt.Errorf("failed to cleanup node resources: %w", err) + } + + // Then delete the transit switch klog.Infof("Deleting the transit switch %s for the network %s", zic.networkTransitSwitchName, zic.GetNetworkName()) - return libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName) + if err := libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName); err != nil && + !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to delete transit switch: %w", err) + } + return nil } // AddTransitSwitchConfig is only used by the layer2 network controller @@ -384,7 +452,6 @@ func (zic *ZoneInterconnectHandler) addTransitSwitchConfig(sw *nbdb.LogicalSwitc } // createLocalZoneNodeResources creates the local zone node resources for interconnect -// - creates Transit switch if it doesn't yet exit // - creates a logical switch port of type "router" in the transit switch with the name as - .tstor- // Eg. if the node name is ovn-worker and the network is default, the name would be - tstor-ovn-worker // if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor-ovn-worker @@ -442,7 +509,6 @@ func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.No } // createRemoteZoneNodeResources creates the remote zone node resources -// - creates Transit switch if it doesn't yet exit // - creates a logical port of type "remote" in the transit switch with the name as - .tstor. // Eg. if the node name is ovn-worker and the network is default, the name would be - tstor.ovn-worker // if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor.ovn-worker @@ -460,9 +526,18 @@ func (zic *ZoneInterconnectHandler) createRemoteZoneNodeResources(node *corev1.N remotePortAddr = remotePortAddr + " " + tsNetwork } + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + lspOptions := map[string]string{ libovsdbops.RequestedTnlKey: strconv.Itoa(nodeID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, } // Store the node name in the external_ids column for book keeping externalIDs := map[string]string{ diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go index e138037031..76f872e00b 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go @@ -35,8 +35,8 @@ const ( // ovnNodeZoneNameAnnotation is the node annotation name to store the node zone name. ovnNodeZoneNameAnnotation = "k8s.ovn.org/zone-name" - // ovnNodeChassisIDAnnotatin is the node annotation name to store the node chassis id. - ovnNodeChassisIDAnnotatin = "k8s.ovn.org/node-chassis-id" + // ovnNodeChassisIDAnnotation is the node annotation name to store the node chassis id. + ovnNodeChassisIDAnnotation = "k8s.ovn.org/node-chassis-id" // ovnNodeSubnetsAnnotation is the node annotation name to store the node subnets. ovnNodeSubnetsAnnotation = "k8s.ovn.org/node-subnets" @@ -298,7 +298,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.2.0/24\"]}", @@ -315,7 +315,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.3.0/24\"]}", @@ -332,7 +332,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.4.0/24\"]}", @@ -562,11 +562,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + // Call ICHandler CleanupStaleNodes function removing the testNode3 from the list of nodes var kNodes []interface{} kNodes = append(kNodes, &testNode1) kNodes = append(kNodes, &testNode2) - err = zoneICHandler.SyncNodes(kNodes) + err = zoneICHandler.CleanupStaleNodes(kNodes) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -583,6 +583,239 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + ginkgo.It("CleanupStaleNodes with nil should cleanup all transit switch ports for no-overlay migration", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient, nil) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + // Create transit switch and add nodes (simulating previous overlay configuration) + err = zoneICHandler.createOrUpdateTransitSwitch(0) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Set up nodes: testNode1 as local zone, testNode2 and testNode3 as remote zones + testNode2.Annotations[ovnNodeZoneNameAnnotation] = "remote-zone-1" + testNode3.Annotations[ovnNodeZoneNameAnnotation] = "remote-zone-2" + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch exists with ports + ts, err := libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(ts.Ports).NotTo(gomega.BeEmpty(), "Transit switch should have ports before cleanup") + + // Verify IC router ports exist (for local zone node) + clusterRouter, err := libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + icRouterPorts := 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Should have router port for local zone node before cleanup") + + // Verify IC static routes exist (for remote zone nodes) + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).NotTo(gomega.BeEmpty(), "Should have IC static routes for remote zone nodes before cleanup") + + // Call CleanupStaleNodes with nil to simulate no-overlay migration + // nil means "no current IC nodes", so all nodes become stale and should be cleaned up + err = zoneICHandler.CleanupStaleNodes(nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify all transit switch ports are cleaned up + ts, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(ts.Ports).To(gomega.BeEmpty(), "Transit switch ports should be cleaned up") + + // Verify all IC router ports are cleaned up (local zone node resources) + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(0), "All IC router ports should be cleaned up") + + // Verify all IC static routes are cleaned up (remote zone node resources) + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).To(gomega.BeEmpty(), "All IC static routes should be cleaned up") + + // Now call Cleanup to remove all interconnect resources (transit switch and any remaining nodes) + err = zoneICHandler.Cleanup() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch is deleted + _, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).To(gomega.MatchError(libovsdbclient.ErrNotFound)) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("CleanupStaleNodes with nil should cleanup orphaned IC resources when transit switch doesn't exist", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient, nil) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + // Create transit switch and add nodes (simulating previous IC configuration) + err = zoneICHandler.createOrUpdateTransitSwitch(0) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Add testNode1 as local zone, testNode2 and testNode3 as remote zone + testNode2.Annotations[ovnNodeZoneNameAnnotation] = "remote" + testNode3.Annotations[ovnNodeZoneNameAnnotation] = "remote" + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify IC resources exist + clusterRouter, err := libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Count IC router ports (for local zone nodes) + icRouterPorts := 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Should have router port for local zone node (node1)") + + // Count IC static routes (for remote zone nodes) + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).ToNot(gomega.BeEmpty(), "Should have IC static routes for remote zone nodes (node2, node3)") + + // Manually delete the transit switch to simulate the resource leak scenario + // This leaves orphaned router ports and static routes + err = libovsdbops.DeleteLogicalSwitch(libovsdbOvnNBClient, types.TransitSwitch) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch is gone + _, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).To(gomega.MatchError(libovsdbclient.ErrNotFound)) + + // Verify orphaned resources still exist + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Router port should still exist before cleanup (the leak)") + + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).ToNot(gomega.BeEmpty(), "IC static routes should still exist before cleanup (the leak)") + + // Call CleanupStaleNodes with nil - should discover all nodes and clean them + err = zoneICHandler.CleanupStaleNodes(nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify all router ports are cleaned up + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(0), "All router ports should be cleaned up") + + // Verify all IC static routes are cleaned up + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).To(gomega.BeEmpty(), "All IC static routes should be cleaned up") + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("Secondary networks", func() { @@ -591,7 +824,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.2.0/24\"]}", @@ -608,7 +841,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.3.0/24\"]}", @@ -625,7 +858,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.4.0/24\"]}", @@ -718,11 +951,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + // Call ICHandler CleanupStaleNodes function removing the testNode3 from the list of nodes var kNodes []interface{} kNodes = append(kNodes, &testNode1) kNodes = append(kNodes, &testNode2) - err = zoneICHandler.SyncNodes(kNodes) + err = zoneICHandler.CleanupStaleNodes(kNodes) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -746,7 +979,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.2.0/24\"], \"blue\":[\"11.244.2.0/24\"]}", @@ -763,7 +996,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.3.0/24\"], \"blue\":[\"11.244.3.0/24\"]}", @@ -780,7 +1013,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.4.0/24\"], \"blue\":[\"11.244.4.0/24\"]}", @@ -1004,6 +1237,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { // Set the node transit switch port ips testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"100.88.0.5/16\"}" err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("k8s.ovn.org/node-chassis-id annotation not found for node node4"))) + + // Set chassis-id annotation + testNode4.Annotations[ovnNodeChassisIDAnnotation] = "c44f341d-2862-4fbe-8b93-10e98b0fa84f" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("failed to create static route ops: unable to get logical router static routes with predicate on router ovn_cluster_router"))) // Create the cluster router diff --git a/go-controller/pkg/ovnwebhook/nodeadmission.go b/go-controller/pkg/ovnwebhook/nodeadmission.go index e7dc733371..74cfe79e46 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission.go @@ -59,14 +59,6 @@ var commonNodeAnnotationChecks = map[string]checkNodeAnnot{ // interconnectNodeAnnotationChecks holds annotations allowed for ovnkube-node: users in IC environments var interconnectNodeAnnotationChecks = map[string]checkNodeAnnot{ - util.OvnNodeMigratedZoneName: func(v annotationChange, nodeName string) error { - // it is allowed for the annotation to be set to - if (v.action == added || v.action == changed) && v.value == nodeName { - return nil - } - - return fmt.Errorf("%s can only be set to %s, it cannot be removed", util.OvnNodeMigratedZoneName, nodeName) - }, util.Layer2TopologyVersion: func(v annotationChange, _ string) error { // it is allowed for the annotation to be added or removed if v.action == added || v.action == removed { diff --git a/go-controller/pkg/ovnwebhook/nodeadmission_test.go b/go-controller/pkg/ovnwebhook/nodeadmission_test.go index 53975940b3..bb3a946533 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission_test.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission_test.go @@ -403,47 +403,6 @@ func TestNodeAdmission_ValidateUpdate(t *testing.T) { }) } } -func TestNodeAdmission_ValidateUpdateIC(t *testing.T) { - adm := NewNodeAdmissionWebhook(true, false) - tests := []struct { - name string - ctx context.Context - oldObj runtime.Object - newObj runtime.Object - expectedErr error - }{ - { - name: "ovnkube-node cannot set util.OvnNodeMigratedZoneName to anything else than ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: userName, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: "global"}, - }, - }, - expectedErr: fmt.Errorf("user: %q is not allowed to set %s on node %q: %s can only be set to %s, it cannot be removed", userName, util.OvnNodeMigratedZoneName, nodeName, util.OvnNodeMigratedZoneName, nodeName), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := adm.ValidateUpdate(tt.ctx, tt.oldObj, tt.newObj) - if err != tt.expectedErr && err.Error() != tt.expectedErr.Error() { - t.Errorf("ValidateUpdateIC() error = %v, wantErr %v", err, tt.expectedErr) - return - } - }) - } -} - func TestNodeAdmission_ValidateUpdateHybridOverlay(t *testing.T) { adm := NewNodeAdmissionWebhook(false, true) tests := []struct { @@ -502,64 +461,3 @@ func TestNodeAdmission_ValidateUpdateHybridOverlay(t *testing.T) { }) } } - -func TestNodeAdmission_ValidateUpdateExtraUsers(t *testing.T) { - extraUser := "system:serviceaccount:ovnkube-cluster-manager" - adm := NewNodeAdmissionWebhook(true, false, extraUser) - tests := []struct { - name string - ctx context.Context - oldObj runtime.Object - newObj runtime.Object - expectedErr error - }{ - { - name: "extra user cannot set util.OvnNodeMigratedZoneName to anything else than ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: extraUser, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: "global"}, - }, - }, - expectedErr: fmt.Errorf("user: %q is not allowed to set %s on node %q: %s can only be set to %s, it cannot be removed", extraUser, util.OvnNodeMigratedZoneName, nodeName, util.OvnNodeMigratedZoneName, nodeName), - }, - { - name: "extra user can set util.OvnNodeMigratedZoneName to ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: extraUser, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: nodeName}, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := adm.ValidateUpdate(tt.ctx, tt.oldObj, tt.newObj) - if err != tt.expectedErr && err.Error() != tt.expectedErr.Error() { - t.Errorf("ValidateUpdateIC() error = %v, wantErr %v", err, tt.expectedErr) - return - } - }) - } -} diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index c9de84d8c2..27e93d001f 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -415,7 +415,9 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { } } - klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + if entry.failedAttempts > 0 { + klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + } if initObj != nil { r.ResourceHandler.RecordSuccessEvent(initObj) } @@ -489,13 +491,13 @@ func (r *RetryFramework) processObjectInTerminalState(obj interface{}, lockedKey _, loaded := r.terminatedObjects.LoadOrStore(lockedKey, true) if loaded { // object was already terminated - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ "ignored as it has already been processed", lockedKey, r.ResourceHandler.ObjType) return } // The object is in a terminal state: delete it from the cluster, delete its retry entry and return. - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ " during %s event: will remove it", lockedKey, r.ResourceHandler.ObjType, event) internalCacheEntry := r.ResourceHandler.GetInternalCacheEntry(obj) retryEntry := r.initRetryObjWithDelete(obj, lockedKey, internalCacheEntry, true) // set up the retry obj for deletion @@ -597,8 +599,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin r.ResourceHandler.ObjType, err) return } - klog.V(5).Infof("Update event received for resource %s, old object is equal to new: %t", - r.ResourceHandler.ObjType, areEqual) if areEqual { return } @@ -650,7 +650,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin } klog.V(5).Infof("Update event received for %s %s", r.ResourceHandler.ObjType, newKey) - r.DoWithLock(newKey, func(key string) { // STEP 1: // Delete existing (old) object if: diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 1b26e7b9d5..fc979dec24 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -252,6 +252,11 @@ const ( NetworkRoleInfrastructure = "infrastructure-locked" NetworkRoleNone = "none" + // Network transport types - canonical format (lowercase) + NetworkTransportGeneve = "geneve" + NetworkTransportNoOverlay = "no-overlay" + NetworkTransportEVPN = "evpn" + // db index keys // PrimaryIDKey is used as a primary client index PrimaryIDKey = OvnK8sPrefix + "/id" diff --git a/go-controller/pkg/util/dns.go b/go-controller/pkg/util/dns.go index 9466ad16f5..86d8a9e054 100644 --- a/go-controller/pkg/util/dns.go +++ b/go-controller/pkg/util/dns.go @@ -16,8 +16,12 @@ import ( ) const ( - // defaultTTL is used if an invalid or zero TTL is provided. - defaultTTL = 30 * time.Minute + // defaultMinTTL is the minimum TTL value that will be used for a domain name if an invalid or zero TTL is found + defaultMinTTL = 5 * time.Second + // defaultMaxTTL is the maximum TTL value that will be used for a domain name if an invalid or zero TTL is found + defaultMaxTTL = 2 * time.Minute + // maxRetryBeforeBackoff is the maximum number of times to retry a DNS lookup before exponential backoff starts + maxRetryBeforeBackoff = 10 ) type dnsValue struct { @@ -27,6 +31,8 @@ type dnsValue struct { ttl time.Duration // Holds (last dns lookup time + ttl), tells when to refresh IPs next time nextQueryTime time.Time + // Number of times the DNS lookup has been retried before backoff starts + retryCount int } type DNS struct { @@ -105,11 +111,22 @@ func (d *DNS) updateOne(dns string) (bool, error) { return false, fmt.Errorf("DNS value not found in dnsMap for domain: %q", dns) } - ips, ttl, err := d.getIPsAndMinTTL(dns) - if err != nil { - res.nextQueryTime = time.Now().Add(defaultTTL) - d.dnsMap[dns] = res - return false, err + ips, ttl, retry, err := d.getIPsAndMinTTL(dns) + if retry { + // If the DNS lookup has been retried maxRetryCount times, use exponential backoff + // by doubling the previous TTL. The TTL is capped at defaultMaxTTL. + if res.retryCount >= maxRetryBeforeBackoff { + ttl = min(res.ttl*2, defaultMaxTTL) + } else { + // Increment the retry count + res.retryCount++ + } + // If no valid IPs were found, use the previous IPs as fallback. + if len(ips) == 0 { + ips = res.ips + } + } else { + res.retryCount = 0 } changed := false @@ -120,10 +137,10 @@ func (d *DNS) updateOne(dns string) (bool, error) { res.ttl = ttl res.nextQueryTime = time.Now().Add(res.ttl) d.dnsMap[dns] = res - return changed, nil + return changed, err } -func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, error) { +func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, bool, error) { ips := []net.IP{} ttlSet := false var ttlSeconds uint32 @@ -197,19 +214,27 @@ func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, error) { } if !ttlSet || (len(ips) == 0) { - return nil, defaultTTL, fmt.Errorf("IPv4 or IPv6 addr not found for domain: %q, nameservers: %v", domain, d.nameservers) + return nil, defaultMinTTL, true, fmt.Errorf("IPv4 or IPv6 addr not found for domain: %q, nameservers: %v", domain, d.nameservers) } + ips = removeDuplicateIPs(ips) + ttl, err := time.ParseDuration(fmt.Sprintf("%ds", minTTL)) if err != nil { - utilruntime.HandleError(fmt.Errorf("invalid TTL value for domain: %q, err: %v, defaulting ttl=%s", domain, err, defaultTTL.String())) - ttl = defaultTTL + utilruntime.HandleError(fmt.Errorf("invalid TTL value for domain: %q, err: %v", domain, err)) + return ips, defaultMinTTL, true, nil } if ttl == 0 { - ttl = defaultTTL + // If the TTL is 0, return the default minimum TTL. The retry is set to false as this + // is not an error scenario. TTL being 0 is a valid scenario for some DNS servers + // and it means that the IP addresses should be refreshed everytime whenever the DNS + // name is being used. From the point of view of OVN-Kubernetes, the IP addresses are + // refreshed every defaultMinTTL. + klog.V(5).Infof("TTL value is 0 for domain: %q, defaulting ttl=%s", domain, defaultMinTTL.String()) + return ips, defaultMinTTL, false, nil } - return removeDuplicateIPs(ips), ttl, nil + return ips, ttl, false, nil } func (d *DNS) GetNextQueryTime() (time.Time, string, bool) { diff --git a/go-controller/pkg/util/dns_test.go b/go-controller/pkg/util/dns_test.go index a9d248042b..9f40c176ba 100644 --- a/go-controller/pkg/util/dns_test.go +++ b/go-controller/pkg/util/dns_test.go @@ -70,13 +70,16 @@ func TestGetIPsAndMinTTL(t *testing.T) { tests := []struct { desc string errExp bool + retry bool ipv4Mode bool ipv6Mode bool dnsOpsMockHelper []ovntest.TestifyMockHelper + expectedTTL time.Duration }{ { desc: "call to Exchange fails IPv4 only", errExp: true, + retry: true, ipv4Mode: true, ipv6Mode: false, dnsOpsMockHelper: []ovntest.TestifyMockHelper{ @@ -89,10 +92,12 @@ func TestGetIPsAndMinTTL(t *testing.T) { CallTimes: 1, }, }, + expectedTTL: defaultMinTTL, }, { desc: "Exchange returns correctly but Rcode != RcodeSuccess IPv4 only", errExp: true, + retry: true, ipv4Mode: true, ipv6Mode: false, dnsOpsMockHelper: []ovntest.TestifyMockHelper{ @@ -105,6 +110,46 @@ func TestGetIPsAndMinTTL(t *testing.T) { CallTimes: 1, }, }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but with TTL 0 IPv4 only", + errExp: false, + retry: false, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{A: net.ParseIP("1.2.3.4")}}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but no Answer IPv4 only", + errExp: true, + retry: true, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but with non-zero TTL IPv4 only", + errExp: false, + retry: false, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{Hdr: dns.RR_Header{Ttl: 100}, A: net.ParseIP("1.2.3.4")}}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: 100 * time.Second, }, } @@ -128,19 +173,22 @@ func TestGetIPsAndMinTTL(t *testing.T) { } config.IPv4Mode = tc.ipv4Mode config.IPv6Mode = tc.ipv6Mode - res, _, err := testDNS.getIPsAndMinTTL("www.test.com") - t.Log(res, err) + res, ttl, retry, err := testDNS.getIPsAndMinTTL("www.test.com") + t.Log(res, ttl, retry, err) if tc.errExp { require.Error(t, err) } else { require.NoError(t, err) } + assert.Equal(t, tc.retry, retry, "the exponentialBackoff variable should match the return from dns.getIPsAndMinTTL()") + assert.Equal(t, tc.expectedTTL, ttl, "the ttl variable should match the return from dns.getIPsAndMinTTL()") mockDNSOps.AssertExpectations(t) }) } } func TestUpdate(t *testing.T) { + config.IPv4Mode = true mockDNSOps := new(util_mocks.DNSOps) SetDNSLibOpsMockInst(mockDNSOps) @@ -252,6 +300,7 @@ func TestUpdate(t *testing.T) { } func TestAdd(t *testing.T) { + config.IPv4Mode = true dnsName := "www.testing.com" mockDNSOps := new(util_mocks.DNSOps) SetDNSLibOpsMockInst(mockDNSOps) @@ -319,3 +368,211 @@ func TestAdd(t *testing.T) { } } + +func TestIPsEqual(t *testing.T) { + tests := []struct { + desc string + oldips []net.IP + newips []net.IP + expEqual bool + }{ + { + desc: "oldips and newips are the same", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.4")}, + expEqual: true, + }, + { + desc: "oldips and newips are different", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips and newips are different length", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is nil and newips is not nil", + oldips: nil, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is empty and newips is not empty", + oldips: []net.IP{}, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is not nil and newips is nil", + oldips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + newips: nil, + expEqual: false, + }, + { + desc: "oldips is not empty and newips is empty", + oldips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + newips: []net.IP{}, + expEqual: false, + }, + { + desc: "oldips and newips are both nil", + oldips: nil, + newips: nil, + expEqual: true, + }, + { + desc: "oldips and newips are both empty", + oldips: []net.IP{}, + newips: []net.IP{}, + expEqual: true, + }, + { + desc: "oldips is nil and newips is empty", + oldips: nil, + newips: []net.IP{}, + expEqual: true, + }, + { + desc: "oldips is empty and newips is nil", + oldips: []net.IP{}, + newips: nil, + expEqual: true, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + res := ipsEqual(tc.oldips, tc.newips) + assert.Equal(t, tc.expEqual, res) + }) + } +} + +func TestUpdateOne(t *testing.T) { + config.IPv4Mode = true + dnsName := "www.testing.com" + newIP := net.ParseIP("1.2.3.4") + fqdnOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{dnsName}, CallTimes: 1, + } + setQuestionOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1, + } + exchangeSuccessNoAnswerOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeSuccessZeroTTLOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{A: newIP}}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeSuccessNonZeroTTLOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{Hdr: dns.RR_Header{Ttl: 100}, A: newIP}}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeFailureOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeServerFailure}}, 0 * time.Second, nil}, CallTimes: 1, + } + tests := []struct { + desc string + numCalls int + exchangeOpsMockHelper ovntest.TestifyMockHelper + expTTL time.Duration + }{ + { + desc: "when Exchange function returns with Rcode != RcodeSuccess, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns successfully but without Answer, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessNoAnswerOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function 2 times, defaultMinTTL is used", + numCalls: 2, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function 11 times, defaultMinTTL is used", + numCalls: 11, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess twice, defaultMinTTL is used", + numCalls: 2, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 10 times, defaultMinTTL is used", + numCalls: 10, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 11 times, defaultMinTTL is doubled", + numCalls: 11, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: 2 * defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 14 times, 16 (2^4) times defaultMinTTL is used", + numCalls: 14, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: 16 * defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 15 times, defaultMaxTTL is used", + numCalls: 15, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMaxTTL, + }, + { + desc: "when TTL returned is non-zero by Exchange function, it is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessNonZeroTTLOpsMockHelper, + expTTL: 100 * time.Second, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + mockDNSOps := new(util_mocks.DNSOps) + SetDNSLibOpsMockInst(mockDNSOps) + dnsOpsMockHelper := []ovntest.TestifyMockHelper{fqdnOpsMockHelper, setQuestionOpsMockHelper, tc.exchangeOpsMockHelper} + for index := 0; index < tc.numCalls; index++ { + for _, item := range dnsOpsMockHelper { + call := mockDNSOps.On(item.OnCallMethodName) + for _, arg := range item.OnCallMethodArgType { + call.Arguments = append(call.Arguments, mock.AnythingOfType(arg)) + } + for _, ret := range item.RetArgList { + call.ReturnArguments = append(call.ReturnArguments, ret) + } + call.Once() + } + } + dns := DNS{ + dnsMap: make(map[string]dnsValue), + nameservers: []string{"1.1.1.1"}, + } + dns.dnsMap[dnsName] = dnsValue{} + for i := 0; i < tc.numCalls; i++ { + _, _ = dns.updateOne(dnsName) + } + assert.Equal(t, tc.expTTL, dns.dnsMap[dnsName].ttl) + mockDNSOps.AssertExpectations(t) + }) + } +} diff --git a/go-controller/pkg/util/fake_client.go b/go-controller/pkg/util/fake_client.go index 0ca981e849..e78010d572 100644 --- a/go-controller/pkg/util/fake_client.go +++ b/go-controller/pkg/util/fake_client.go @@ -39,6 +39,8 @@ import ( routeadvertisementsfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/fake" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" udnfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" ) func GetOVNClientset(objects ...runtime.Object) *OVNClientset { @@ -58,6 +60,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { raObjects := []runtime.Object{} frrObjects := []runtime.Object{} networkConnectObjects := []runtime.Object{} + vtepObjects := []runtime.Object{} for _, object := range objects { switch object.(type) { case *egressip.EgressIP: @@ -90,6 +93,8 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { networkQoSObjects = append(networkQoSObjects, object) case *networkconnect.ClusterNetworkConnect: networkConnectObjects = append(networkConnectObjects, object) + case *vtepv1.VTEP: + vtepObjects = append(vtepObjects, object) default: v1Objects = append(v1Objects, object) } @@ -119,6 +124,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { FRRClient: frrfake.NewSimpleClientset(frrObjects...), NetworkQoSClient: networkqosfake.NewSimpleClientset(networkQoSObjects...), NetworkConnectClient: networkconnectfake.NewSimpleClientset(networkConnectObjects...), + VTEPClient: vtepfake.NewSimpleClientset(vtepObjects...), } } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index ad551442ab..b5e314d315 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -54,6 +54,7 @@ import ( networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" routeadvertisementsclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned" userdefinednetworkclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" + vtepclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned" ) // OVNClientset is a wrapper around all clientsets used by OVN-Kubernetes @@ -75,6 +76,7 @@ type OVNClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } // OVNMasterClientset @@ -95,6 +97,7 @@ type OVNMasterClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } // OVNKubeControllerClientset @@ -143,6 +146,7 @@ type OVNClusterManagerClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } const ( @@ -173,6 +177,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, NetworkQoSClient: cs.NetworkQoSClient, + VTEPClient: cs.VTEPClient, } } @@ -233,6 +238,7 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, NetworkQoSClient: cs.NetworkQoSClient, + VTEPClient: cs.VTEPClient, } } @@ -547,6 +553,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + vtepClientset, err := vtepclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, ANPClient: anpClientset, @@ -565,6 +576,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { RouteAdvertisementsClient: routeAdvertisementsClientset, FRRClient: frrClientset, NetworkQoSClient: networkqosClientset, + VTEPClient: vtepClientset, }, nil } diff --git a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go index f382625105..edaef77470 100644 --- a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go +++ b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go @@ -52,6 +52,24 @@ func (_m *NetInfo) EVPNIPVRFRouteTarget() string { return r0 } +// EVPNIPVRFVID provides a mock function with no fields +func (_m *NetInfo) EVPNIPVRFVID() int { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for EVPNIPVRFVID") + } + + var r0 int + if rf, ok := ret.Get(0).(func() int); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int) + } + + return r0 +} + // EVPNIPVRFVNI provides a mock function with no fields func (_m *NetInfo) EVPNIPVRFVNI() int32 { ret := _m.Called() @@ -88,6 +106,24 @@ func (_m *NetInfo) EVPNMACVRFRouteTarget() string { return r0 } +// EVPNMACVRFVID provides a mock function with no fields +func (_m *NetInfo) EVPNMACVRFVID() int { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for EVPNMACVRFVID") + } + + var r0 int + if rf, ok := ret.Get(0).(func() int); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int) + } + + return r0 +} + // EVPNMACVRFVNI provides a mock function with no fields func (_m *NetInfo) EVPNMACVRFVNI() int32 { ret := _m.Called() diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 30a8bc4eef..878f8666cd 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -60,8 +60,10 @@ type NetInfo interface { EVPNVTEPName() string EVPNMACVRFVNI() int32 EVPNMACVRFRouteTarget() string + EVPNMACVRFVID() int EVPNIPVRFVNI() int32 EVPNIPVRFRouteTarget() string + EVPNIPVRFVID() int GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet GetNodeManagementIP(hostSubnet *net.IPNet) *net.IPNet @@ -459,6 +461,8 @@ func (nInfo *mutableNetInfo) getNamespaces() sets.Set[string] { } func (nInfo *mutableNetInfo) GetNADNamespaces() []string { + nInfo.RLock() + defer nInfo.RUnlock() return nInfo.getNamespaces().UnsortedList() } @@ -658,7 +662,7 @@ func (nInfo *DefaultNetInfo) PhysicalNetworkName() string { // Transport returns the transport protocol for east-west traffic func (nInfo *DefaultNetInfo) Transport() string { - return "" + return config.Default.Transport } // EVPNVTEPName returns empty as EVPN is not supported on the default network @@ -686,6 +690,16 @@ func (nInfo *DefaultNetInfo) EVPNIPVRFRouteTarget() string { return "" } +// EVPNMACVRFVID returns 0 as EVPN is not supported on the default network +func (nInfo *DefaultNetInfo) EVPNMACVRFVID() int { + return 0 +} + +// EVPNIPVRFVID returns 0 as EVPN is not supported on the default network +func (nInfo *DefaultNetInfo) EVPNIPVRFVID() int { + return 0 +} + func (nInfo *DefaultNetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { return GetNodeGatewayIfAddr(hostSubnet) } @@ -853,6 +867,9 @@ func (nInfo *userDefinedNetInfo) PhysicalNetworkName() string { // Transport returns the transport protocol for east-west traffic func (nInfo *userDefinedNetInfo) Transport() string { + if nInfo.transport == "" { + return types.NetworkTransportGeneve + } return nInfo.transport } @@ -896,6 +913,22 @@ func (nInfo *userDefinedNetInfo) EVPNIPVRFRouteTarget() string { return nInfo.evpn.IPVRF.RouteTarget } +// EVPNMACVRFVID returns the MAC-VRF VID for EVPN +func (nInfo *userDefinedNetInfo) EVPNMACVRFVID() int { + if nInfo.evpn == nil || nInfo.evpn.MACVRF == nil { + return 0 + } + return nInfo.evpn.MACVRF.VID +} + +// EVPNIPVRFVID returns the IP-VRF VID for EVPN +func (nInfo *userDefinedNetInfo) EVPNIPVRFVID() int { + if nInfo.evpn == nil || nInfo.evpn.IPVRF == nil { + return 0 + } + return nInfo.evpn.IPVRF.VID +} + func (nInfo *userDefinedNetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { if IsPreconfiguredUDNAddressesEnabled() && nInfo.TopologyType() == types.Layer2Topology && nInfo.IsPrimaryNetwork() { isIPV6 := knet.IsIPv6CIDR(hostSubnet) @@ -1009,7 +1042,7 @@ func (nInfo *userDefinedNetInfo) canReconcile(other NetInfo) bool { if nInfo.physicalNetworkName != other.PhysicalNetworkName() { return false } - if nInfo.transport != other.Transport() { + if nInfo.Transport() != other.Transport() { return false } if nInfo.EVPNVTEPName() != other.EVPNVTEPName() { @@ -1503,6 +1536,18 @@ func ValidateNetConf(nadName string, netconf *ovncnitypes.NetConf) error { return fmt.Errorf("error parsing Network Attachment Definition %s: %w", nadName, ErrorUnsupportedIPAMKey) } + // Validate transport if specified + if netconf.Transport != "" && + netconf.Transport != types.NetworkTransportGeneve && + netconf.Transport != types.NetworkTransportNoOverlay && + netconf.Transport != types.NetworkTransportEVPN { + return fmt.Errorf("invalid transport %q: must be one of %q", netconf.Transport, []string{ + types.NetworkTransportGeneve, + types.NetworkTransportNoOverlay, + types.NetworkTransportEVPN, + }) + } + if netconf.JoinSubnet != "" && netconf.Topology == types.LocalnetTopology { return fmt.Errorf("localnet topology does not allow specifying join-subnet as services are not supported") } diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 2e8c98b29b..2861650ba8 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -1829,6 +1829,13 @@ func TestAreNetworksCompatible(t *testing.T) { expectedResult: false, expectationDescription: "we should reconcile on physical network name updates", }, + { + desc: "empty transport and geneve config should be compatible", + aNetwork: &userDefinedNetInfo{transport: ""}, + anotherNetwork: &userDefinedNetInfo{transport: "geneve"}, + expectedResult: true, + expectationDescription: "networks with no EVPN config should be compatible", + }, } for _, test := range tests { @@ -2005,8 +2012,10 @@ func TestEVPNConfig(t *testing.T) { expectedVTEPName string expectedMACVRFVNI int32 expectedMACVRFRouteTarget string + expectedMACVRFVID int expectedIPVRFVNI int32 expectedIPVRFRouteTarget string + expectedIPVRFVID int } tests := []testConfig{ @@ -2016,7 +2025,7 @@ func TestEVPNConfig(t *testing.T) { NetConf: cnitypes.NetConf{Name: ovntypes.DefaultNetworkName}, Topology: ovntypes.Layer3Topology, }, - expectedTransport: "", + expectedTransport: "geneve", expectedVTEPName: "", expectedMACVRFVNI: 0, expectedMACVRFRouteTarget: "", @@ -2029,7 +2038,7 @@ func TestEVPNConfig(t *testing.T) { NetConf: cnitypes.NetConf{Name: "l3-network"}, Topology: ovntypes.Layer3Topology, }, - expectedTransport: "", + expectedTransport: "geneve", expectedVTEPName: "", expectedMACVRFVNI: 0, expectedMACVRFRouteTarget: "", @@ -2104,18 +2113,33 @@ func TestEVPNConfig(t *testing.T) { expectedIPVRFRouteTarget: "65000:1000", }, { - desc: "layer2 network with nooverlay transport", + desc: "layer2 network with EVPN transport including VIDs (allocated by controller)", inputNetConf: &ovncnitypes.NetConf{ - NetConf: cnitypes.NetConf{Name: "nooverlay-network"}, + NetConf: cnitypes.NetConf{Name: "evpn-with-vids"}, Topology: ovntypes.Layer2Topology, - Transport: "nooverlay", + Transport: "evpn", + EVPN: &ovncnitypes.EVPNConfig{ + VTEP: "vid-vtep", + MACVRF: &ovncnitypes.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + VID: 12, + }, + IPVRF: &ovncnitypes.VRFConfig{ + VNI: 1000, + RouteTarget: "65000:1000", + VID: 13, + }, + }, }, - expectedTransport: "nooverlay", - expectedVTEPName: "", - expectedMACVRFVNI: 0, - expectedMACVRFRouteTarget: "", - expectedIPVRFVNI: 0, - expectedIPVRFRouteTarget: "", + expectedTransport: "evpn", + expectedVTEPName: "vid-vtep", + expectedMACVRFVNI: 100, + expectedMACVRFRouteTarget: "65000:100", + expectedMACVRFVID: 12, + expectedIPVRFVNI: 1000, + expectedIPVRFRouteTarget: "65000:1000", + expectedIPVRFVID: 13, }, { desc: "EVPN config with VNI only (no route target)", @@ -2149,8 +2173,10 @@ func TestEVPNConfig(t *testing.T) { g.Expect(netInfo.EVPNVTEPName()).To(gomega.Equal(test.expectedVTEPName), "VTEP name mismatch") g.Expect(netInfo.EVPNMACVRFVNI()).To(gomega.Equal(test.expectedMACVRFVNI), "MAC-VRF VNI mismatch") g.Expect(netInfo.EVPNMACVRFRouteTarget()).To(gomega.Equal(test.expectedMACVRFRouteTarget), "MAC-VRF RouteTarget mismatch") + g.Expect(netInfo.EVPNMACVRFVID()).To(gomega.Equal(test.expectedMACVRFVID), "MAC-VRF VID mismatch") g.Expect(netInfo.EVPNIPVRFVNI()).To(gomega.Equal(test.expectedIPVRFVNI), "IP-VRF VNI mismatch") g.Expect(netInfo.EVPNIPVRFRouteTarget()).To(gomega.Equal(test.expectedIPVRFRouteTarget), "IP-VRF RouteTarget mismatch") + g.Expect(netInfo.EVPNIPVRFVID()).To(gomega.Equal(test.expectedIPVRFVID), "IP-VRF VID mismatch") }) } } @@ -2173,7 +2199,7 @@ func TestEVPNNetworkCompatibility(t *testing.T) { { desc: "different transport should not be compatible", aNetwork: &userDefinedNetInfo{transport: "evpn"}, - anotherNetwork: &userDefinedNetInfo{transport: "nooverlay"}, + anotherNetwork: &userDefinedNetInfo{transport: "no-overlay"}, expectedResult: false, expectationDescription: "networks with different transport should not be compatible", }, diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 6016a946b5..e4628b36d2 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -7,6 +7,7 @@ import ( "fmt" "math/big" "net" + "slices" "strconv" "strings" @@ -329,12 +330,54 @@ func GenerateRandMAC() (net.HardwareAddr, error) { func CopyIPNets(ipnets []*net.IPNet) []*net.IPNet { copy := make([]*net.IPNet, len(ipnets)) for i := range ipnets { - ipnet := *ipnets[i] - copy[i] = &ipnet + if ipnets[i] == nil { + continue + } + copy[i] = &net.IPNet{ + IP: slices.Clone(ipnets[i].IP), + Mask: slices.Clone(ipnets[i].Mask), + } } return copy } +func isIPNetEqual(ipn1, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} + +// IsIPNetsEqual returns true if both IPNet slices are equal in length and values, regardless of order. +func IsIPNetsEqual(ipn1, ipn2 []*net.IPNet) bool { + if len(ipn1) != len(ipn2) { + return false + } + used := make([]bool, len(ipn2)) + for i := range ipn1 { + found := false + for j := range ipn2 { + if used[j] { + continue + } + if isIPNetEqual(ipn1[i], ipn2[j]) { + used[j] = true + found = true + break + } + } + if !found { + return false + } + } + return true +} + // IPsToNetworkIPs returns the network CIDRs of the provided IP CIDRs func IPsToNetworkIPs(ips ...*net.IPNet) []*net.IPNet { nets := make([]*net.IPNet, len(ips)) diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index c11bc53e13..7f9af894a1 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -117,22 +117,6 @@ const ( // ovnkube-node gets the node's zone from the OVN Southbound database. OvnNodeZoneName = "k8s.ovn.org/zone-name" - /** HACK BEGIN **/ - // TODO(tssurya): Remove this annotation a few months from now (when one or two release jump - // upgrades are done). This has been added only to minimize disruption for upgrades when - // moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - // OvnNodeMigratedZoneName is the zone to which the node belongs to. It is set by ovnkube-node. - // ovnkube-node gets the node's zone from the OVN Southbound database. - OvnNodeMigratedZoneName = "k8s.ovn.org/remote-zone-migrated" - /** HACK END **/ - // OvnTransitSwitchPortAddr is the annotation to store the node Transit switch port ips. // It is set by cluster manager. OvnTransitSwitchPortAddr = "k8s.ovn.org/node-transit-switch-port-ifaddr" @@ -1164,26 +1148,6 @@ func SetNodeZone(nodeAnnotator kube.Annotator, zoneName string) error { return nodeAnnotator.Set(OvnNodeZoneName, zoneName) } -/** HACK BEGIN **/ -// TODO(tssurya): Remove this a few months from now -// SetNodeZoneMigrated sets the node's zone in the 'ovnNodeMigratedZoneName' node annotation. -func SetNodeZoneMigrated(nodeAnnotator kube.Annotator, zoneName string) error { - return nodeAnnotator.Set(OvnNodeMigratedZoneName, zoneName) -} - -// HasNodeMigratedZone returns true if node has its ovnNodeMigratedZoneName set already -func HasNodeMigratedZone(node *corev1.Node) bool { - _, ok := node.Annotations[OvnNodeMigratedZoneName] - return ok -} - -// NodeMigratedZoneAnnotationChanged returns true if the ovnNodeMigratedZoneName annotation changed for the node -func NodeMigratedZoneAnnotationChanged(oldNode, newNode *corev1.Node) bool { - return oldNode.Annotations[OvnNodeMigratedZoneName] != newNode.Annotations[OvnNodeMigratedZoneName] -} - -/** HACK END **/ - // GetNodeZone returns the zone of the node set in the 'ovnNodeZoneName' node annotation. // If the annotation is not set, it returns the 'default' zone name. func GetNodeZone(node *corev1.Node) string { diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index b32f73999b..3c301c202d 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "io" "net" "path/filepath" "regexp" @@ -669,11 +670,69 @@ func AddOFFlowWithSpecificAction(bridgeName, action string) (string, string, err return strings.Trim(stdout.String(), "\" \n"), stderr.String(), err } +// openFlowStdinReader incrementally renders a flow slice as a newline-delimited +// stream for ovs-ofctl stdin without constructing one large joined string. +type openFlowStdinReader struct { + flows []string + flowIndex int + flowOffset int + needEOL bool +} + +// Read implements io.Reader over r.flows, producing output equivalent to +// strings.Join(flows, "\n"), but in small chunks to reduce peak allocations. +func (r *openFlowStdinReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + // Fast path: no flows left and no pending delimiter. + if r.flowIndex >= len(r.flows) && !r.needEOL { + return 0, io.EOF + } + + total := 0 + for total < len(p) { + if r.needEOL { + // Emit exactly one '\n' between flows. + p[total] = '\n' + total++ + r.needEOL = false + if total == len(p) { + return total, nil + } + continue + } + + if r.flowIndex >= len(r.flows) { + break + } + + flow := r.flows[r.flowIndex] + if r.flowOffset >= len(flow) { + // Current flow was fully consumed; advance and schedule delimiter if + // there is another flow. + r.flowIndex++ + r.flowOffset = 0 + r.needEOL = r.flowIndex < len(r.flows) + continue + } + + // Copy as much of the current flow as fits in caller's buffer. + copied := copy(p[total:], flow[r.flowOffset:]) + total += copied + r.flowOffset += copied + } + + if total == 0 { + return 0, io.EOF + } + return total, nil +} + // ReplaceOFFlows replaces flows in the bridge with a slice of flows func ReplaceOFFlows(bridgeName string, flows []string) (string, string, error) { args := []string{"-O", "OpenFlow13", "--bundle", "replace-flows", bridgeName, "-"} - stdin := &bytes.Buffer{} - stdin.Write([]byte(strings.Join(flows, "\n"))) + stdin := &openFlowStdinReader{flows: flows} cmd := runner.exec.Command(runner.ofctlPath, args...) cmd.SetStdin(stdin) diff --git a/go-controller/pkg/util/ovs_benchmark_test.go b/go-controller/pkg/util/ovs_benchmark_test.go new file mode 100644 index 0000000000..3fb8a7b514 --- /dev/null +++ b/go-controller/pkg/util/ovs_benchmark_test.go @@ -0,0 +1,82 @@ +package util + +import ( + "bytes" + "io" + "strings" + "testing" +) + +var benchmarkFlowBytesSink int64 +var benchmarkFlowCountSink int + +func BenchmarkReplaceOFFlowsInputRendering(b *testing.B) { + benchCases := []struct { + name string + flowCount int + }{ + { + name: "1k_flows", + flowCount: 1000, + }, + { + name: "5k_flows", + flowCount: 5000, + }, + } + + for _, tc := range benchCases { + flows := makeBenchmarkFlows(tc.flowCount) + totalBytes := benchmarkFlowsBytes(flows) + + b.Run(tc.name+"/join_buffer", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &bytes.Buffer{} + stdin.Write([]byte(strings.Join(flows, "\n"))) + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain old flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = stdin.Len() + } + }) + + b.Run(tc.name+"/stream_reader", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &openFlowStdinReader{flows: flows} + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain streaming flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = len(flows) + } + }) + } +} + +func makeBenchmarkFlows(flowCount int) []string { + flows := make([]string, flowCount) + // Keep each flow moderately long to emulate real replace-flows payload size. + const flowSuffix = ",ip,nw_src=10.128.0.0/14,tp_dst=8080,actions=ct(commit),output:2" + for i := 0; i < flowCount; i++ { + flows[i] = "table=0,priority=100,in_port=1,reg0=0x1" + flowSuffix + } + return flows +} + +func benchmarkFlowsBytes(flows []string) int64 { + if len(flows) == 0 { + return 0 + } + total := len(flows) - 1 + for _, flow := range flows { + total += len(flow) + } + return int64(total) +} diff --git a/go-controller/pkg/util/ovs_unit_test.go b/go-controller/pkg/util/ovs_unit_test.go index 2b8e633949..b832c89af2 100644 --- a/go-controller/pkg/util/ovs_unit_test.go +++ b/go-controller/pkg/util/ovs_unit_test.go @@ -3,7 +3,9 @@ package util import ( "bytes" "fmt" + "io" "os" + "strings" "testing" "time" @@ -1695,14 +1697,14 @@ func TestReplaceOFFlows(t *testing.T) { expectedErr: fmt.Errorf("failed to execute ovs-ofctl command"), onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{nil, nil, fmt.Errorf("failed to execute ovs-ofctl command")}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, { desc: "positive: run `ovs-ofctl` command", expectedErr: nil, onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{bytes.NewBuffer([]byte("testblah")), bytes.NewBuffer([]byte("")), nil}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, } for i, tc := range tests { @@ -1722,6 +1724,44 @@ func TestReplaceOFFlows(t *testing.T) { } } +func TestOpenFlowStdinReader(t *testing.T) { + tests := []struct { + desc string + flows []string + }{ + { + desc: "empty flow list", + flows: []string{}, + }, + { + desc: "single flow", + flows: []string{"table=0,priority=0,actions=NORMAL"}, + }, + { + desc: "multiple flows", + flows: []string{"a", "b", "c"}, + }, + { + desc: "includes empty flow", + flows: []string{"a", "", "c"}, + }, + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + r := &openFlowStdinReader{flows: tc.flows} + out, err := io.ReadAll(r) + require.NoError(t, err) + assert.Equal(t, strings.Join(tc.flows, "\n"), string(out)) + + buf := make([]byte, 1) + n, eof := r.Read(buf) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, eof) + }) + } +} + func TestGetOVNDBServerInfo(t *testing.T) { mockKexecIface := new(mock_k8s_io_utils_exec.Interface) mockExecRunner := new(mocks.ExecRunner) diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 76a8833f2a..266d05aaaf 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -366,25 +366,7 @@ func IsClusterIP(svcVIP string) bool { return false } -type UnprocessedActiveNetworkError struct { - namespace string - udnName string -} - -func (m *UnprocessedActiveNetworkError) Error() string { - return fmt.Sprintf("primary UDN %q exists in namespace %s, but NAD has not been processed yet", - m.udnName, m.namespace) -} - -func IsUnprocessedActiveNetworkError(err error) bool { - var unprocessedActiveNetworkError *UnprocessedActiveNetworkError - return errors.As(err, &unprocessedActiveNetworkError) -} - -func NewUnprocessedActiveNetworkError(namespace, udnName string) *UnprocessedActiveNetworkError { - return &UnprocessedActiveNetworkError{namespace: namespace, udnName: udnName} -} - +// InvalidPrimaryNetworkError indicates that the namespace requires a primary UDN, but no primary UDN exists yet type InvalidPrimaryNetworkError struct { namespace string } diff --git a/helm/ovn-kubernetes/Chart.yaml b/helm/ovn-kubernetes/Chart.yaml index 4e6ef123ca..9407621aff 100644 --- a/helm/ovn-kubernetes/Chart.yaml +++ b/helm/ovn-kubernetes/Chart.yaml @@ -60,3 +60,7 @@ dependencies: version: 1.2.0 tags: - ovs-node + - name: ovnkube-single-node-zone-dpu + version: 1.2.0 + tags: + - ovnkube-single-node-zone-dpu diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index fd9899c32b..90efbecad1 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -66,6 +66,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -130,6 +133,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -140,6 +147,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} - name: OVN_DISABLE_SNAT_MULTIPLE_GWS @@ -177,6 +186,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} # end of container volumes: # TODO: Need to check why we need this? @@ -196,5 +207,8 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml index 4a62d3e661..45c801fa92 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml @@ -76,6 +76,8 @@ rules: - networkqoses - userdefinednetworks - clusteruserdefinednetworks + - routeadvertisements + - vteps verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.ovn.org"] resources: @@ -87,6 +89,7 @@ rules: - clusteruserdefinednetworks - clusteruserdefinednetworks/status - clusteruserdefinednetworks/finalizers + - routeadvertisements/status verbs: [ "patch", "update" ] - apiGroups: [""] resources: @@ -127,3 +130,9 @@ rules: - dnsnameresolvers verbs: [ "create", "delete", "list", "patch", "update", "watch" ] {{- end }} + {{- if eq (hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false) true }} + - apiGroups: ["frrk8s.metallb.io"] + resources: + - frrconfigurations + verbs: [ "create", "delete", "get", "list", "patch", "update", "watch" ] + {{- end }} diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index 62849f7af7..d87d1878fd 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -74,6 +74,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -147,6 +150,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true {{- if eq (hasKey .Values.global "enableCompactMode" | ternary .Values.global.enableCompactMode false) true }} # Common mounts # for the iptables wrapper @@ -240,12 +246,18 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} - name: OVN_DYNAMIC_UDN_ALLOCATION value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_EGRESSSERVICE_ENABLE value: {{ default "" .Values.global.enableEgressService | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR @@ -295,6 +307,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_DISABLE_REQUESTEDCHASSIS value: {{ default "false" .Values.global.disableRequestedchassis | quote }} # end of container @@ -316,6 +330,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config {{- if eq (hasKey .Values.global "enableCompactMode" | ternary .Values.global.enableCompactMode false) true }} - name: host-slash hostPath: diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml index 7474c69f8f..cb884a9bea 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml @@ -87,6 +87,7 @@ rules: - userdefinednetworks - clusteruserdefinednetworks - networkqoses + - vteps verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.cni.cncf.io"] resources: diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml index a7a3ebc4c9..f009f7036f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml @@ -97,10 +97,25 @@ spec: # ovnkube-node dpu-host mounts - mountPath: /var/run/ovn name: var-run-ovn + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + {{- if .Values.global.enableNetworkSegmentation }} + - mountPath: /var/run/k8s.cni.cncf.io/devinfo/dp + name: host-devinfo-dp + readOnly: true + {{- end }} resources: requests: cpu: 100m memory: 300Mi + {{- if and (.Values.global.enableNetworkSegmentation) (.Values.mgmtPortVFResourceName) (.Values.mgmtPortVFsCount) }} + {{ .Values.mgmtPortVFResourceName }}: {{ .Values.mgmtPortVFsCount }} + {{- end }} + limits: + {{- if and (.Values.global.enableNetworkSegmentation) (.Values.mgmtPortVFResourceName) (.Values.mgmtPortVFsCount) }} + {{ .Values.mgmtPortVFResourceName }}: {{ .Values.mgmtPortVFsCount }} + {{- end }} env: {{ if .Values.global.enableCoredumps -}} - name: GOTRACEBACK @@ -210,8 +225,16 @@ spec: value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVNKUBE_NODE_MGMT_PORT_NETDEV - value: {{ default "" .Values.global.nodeMgmtPortNetdev | quote }} + value: {{ default "" .Values.nodeMgmtPortNetdev | quote }} + - name: OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME + value: {{ default "" .Values.mgmtPortVFResourceName | quote }} + - name: OVN_ENABLE_INTERCONNECT + value: {{ default "false" .Values.global.enableInterconnect | quote }} + - name: OVN_NETWORK_SEGMENTATION_ENABLE + value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: @@ -263,5 +286,13 @@ spec: path: /run/systemd - name: var-run-ovn emptyDir: {} + - name: ovnkube-config + configMap: + name: ovnkube-config + {{- if .Values.global.enableNetworkSegmentation }} + - name: host-devinfo-dp + hostPath: + path: /var/run/k8s.cni.cncf.io/devinfo/dp + {{- end }} tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml index 584127efca..914b06593e 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml @@ -2,4 +2,18 @@ logLevel: 4 logFileMaxSize: 100 logFileMaxBackups: 5 logFileMaxAge: 5 -ovnControllerLogLevel: 4 \ No newline at end of file +ovnControllerLogLevel: 4 + +# The netdevice or deviceplugin resourcename that specifies pool of devices +# that can be used for management ports has to be specified. +# mgmtPortVFResourceName will override nodeMgmtPortNetdev if both are specified + +# The net device to be used for management port +nodeMgmtPortNetdev: "" + +# The device plugin resource name that has allocated interfaces to be used for management ports +mgmtPortVFResourceName: "" + +# If using UDNs, the number of VFs required to handle management ports, which depends on +# the number of primary UDNs required should be specified. +mgmtPortVFsCount: 1 diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml index dd961de8e7..b1039f6d8f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml @@ -108,6 +108,9 @@ spec: name: run-systemd subPath: private readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -241,6 +244,8 @@ spec: value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: @@ -397,5 +402,8 @@ spec: - name: run-systemd hostPath: path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml index 659bfa2bab..3a0b5162e4 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml @@ -98,6 +98,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /etc/openvswitch/ name: host-etc-ovs readOnly: true @@ -231,6 +234,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -241,6 +248,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_ENABLE_INTERCONNECT value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY @@ -411,5 +420,8 @@ spec: - name: run-systemd hostPath: path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml new file mode 100644 index 0000000000..a956168562 --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: ovnkube-single-node-zone-dpu +description: Helm chart to deploy single node zone stack on DPUs +type: application +version: 1.2.0 +appVersion: "1.2.0" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml new file mode 100644 index 0000000000..ac78168e4f --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml @@ -0,0 +1,575 @@ +# ovnkube-node-dpu +# daemonset version 3 +# starts node daemons for single node zone ovn stack, each in a separate container on DPU +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-node-dpu + # namespace set up by install + namespace: ovn-kubernetes + annotations: + kubernetes.io/description: | + This DaemonSet launches the ovn-kubernetes networking components on dpus in IC mode. +spec: + selector: + matchLabels: + app: ovnkube-node-dpu + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: ovnkube-node-dpu + name: ovnkube-node-dpu + component: network + type: infra + kubernetes.io/os: "linux" + ovn-db-pod: "true" + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + serviceAccountName: ovnkube-node + hostNetwork: true + dnsPolicy: Default + {{- if eq (hasKey .Values.global "unprivilegedMode" | ternary .Values.global.unprivilegedMode false) false }} + hostPID: true + {{- end }} + containers: + # nb-ovsdb - v3 + - name: nb-ovsdb + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "local-nb-ovsdb"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # ovn db is stored in the pod in /etc/openvswitch + # (or in /etc/ovn if OVN from new repository is used) + # and on the host in /var/lib/openvswitch/ + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_NB + value: {{ default "-vconsole:info -vfile:info" .Values.nbLogLevel | quote }} + - name: OVN_NORTHD_BACKOFF_INTERVAL + value: {{ default "0" .Values.northdBackoffInterval | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnnb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of nb-ovsdb container + # sb-ovsdb - v3 + - name: sb-ovsdb + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "local-sb-ovsdb"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # ovn db is stored in the pod in /etc/openvswitch + # (or in /etc/ovn if OVN from new repository is used) + # and on the host in /var/lib/openvswitch/ + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_SB + value: {{ default "-vconsole:info -vfile:info" .Values.sbLogLevel | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnsb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of sb-ovsdb container + # ovn-northd - v3 + - name: ovn-northd + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "run-ovn-northd"] + securityContext: + runAsUser: 0 + capabilities: + add: ["SYS_NICE"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # Run directories where we need to be able to access sockets + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_NORTHD + value: {{ default "-vconsole:info -vfile:info" .Values.northdLogLevel | quote }} + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-northd"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of ovn-northd container + # ovnkube-controller + - name: ovnkube-controller + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovnkube-controller-with-node"] + securityContext: + runAsUser: 0 + {{- if eq (hasKey .Values.global "unprivilegedMode" | ternary .Values.global.unprivilegedMode false) false }} + privileged: true + {{- else }} + capabilities: + add: + - NET_ADMIN + {{- end }} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # Common mounts + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + - mountPath: /var/lib/kubelet + name: host-kubelet + readOnly: true + - mountPath: /host-kubernetes + name: host-kubeconfig + readOnly: true + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/ovn-kubernetes/ + name: host-var-log-ovnkube + # We mount our socket here + - mountPath: /var/run/ovn-kubernetes + name: host-var-run-ovn-kubernetes + # CNI related mounts which we take over + - mountPath: /opt/cni/bin + name: host-opt-cni-bin + - mountPath: /etc/cni/net.d + name: host-etc-cni-netd + - mountPath: /var/run/netns + name: host-netns + mountPropagation: Bidirectional + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + readOnly: true + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + readOnly: true + - mountPath: /run/systemd/private + name: run-systemd + subPath: private + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_EGRESSSERVICE_ENABLE + value: {{ default "" .Values.global.enableEgressService | quote }} + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_LOGLEVEL + value: {{ default 4 .Values.ovnkubeNodeLogLevel | quote }} + - name: OVNKUBE_LOGFILE_MAXSIZE + value: {{ default 100 .Values.logFileMaxSize | quote }} + - name: OVNKUBE_LOGFILE_MAXBACKUPS + value: {{ default 5 .Values.logFileMaxBackups | quote }} + - name: OVNKUBE_LOGFILE_MAXAGE + value: {{ default 5 .Values.logFileMaxAge | quote }} + - name: OVNKUBE_LIBOVSDB_CLIENT_LOGFILE + value: {{ default "" .Values.libovsdbClientLogFile | quote }} + - name: OVNKUBE_CONFIG_DURATION_ENABLE + value: {{ default "" .Values.global.enableConfigDuration | quote }} + - name: OVNKUBE_METRICS_SCALE_ENABLE + value: {{ default "" .Values.global.enableMetricsScale | quote }} + - name: OVN_NET_CIDR + value: {{ default "" .Values.global.dpuHostClusterNetworkCIDR | quote }} + - name: OVN_SVC_CIDR + value: {{ default "" .Values.global.dpuHostClusterServiceCIDR | quote }} + - name: K8S_APISERVER + value: {{ default "" .Values.global.dpuHostClusterK8sAPIServer | quote }} + - name: K8S_TOKEN + value: {{ default "" .Values.global.dpuHostClusterK8sToken | quote }} + - name: K8S_CACERT_DATA + value: {{ default "" .Values.global.dpuHostClusterK8sCACertData | quote }} + - name: K8S_TOKEN_FILE + value: {{ default "" .Values.global.dpuHostClusterK8sTokenFile | quote }} + - name: K8S_CACERT + value: {{ default "" .Values.global.dpuHostClusterK8sCACert | quote }} + - name: OVN_MTU + value: {{ default "" .Values.global.mtu | quote }} + - name: OVN_GATEWAY_MODE + value: {{ default "shared" .Values.global.gatewayMode }} + - name: OVN_GATEWAY_OPTS + value: {{ default "" .Values.global.gatewayOpts | quote }} + - name: OVN_HYBRID_OVERLAY_ENABLE + value: {{ default "" .Values.global.enableHybridOverlay | quote }} + - name: OVN_ADMIN_NETWORK_POLICY_ENABLE + value: {{ default "" .Values.global.enableAdminNetworkPolicy | quote }} + - name: OVN_EGRESSIP_ENABLE + value: {{ default "" .Values.global.enableEgressIp | quote }} + - name: OVN_EGRESSIP_HEALTHCHECK_PORT + value: {{ default "" .Values.global.egressIpHealthCheckPort | quote }} + - name: OVN_EGRESSFIREWALL_ENABLE + value: {{ default "" .Values.global.enableEgressFirewall | quote }} + - name: OVN_EGRESSQOS_ENABLE + value: {{ default "" .Values.global.enableEgressQos | quote }} + - name: OVN_HYBRID_OVERLAY_NET_CIDR + value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} + - name: OVN_DISABLE_SNAT_MULTIPLE_GWS + value: {{ default "" .Values.global.disableSnatMultipleGws | quote }} + - name: OVN_DISABLE_FORWARDING + value: {{ default "" .Values.global.disableForwarding | quote }} + - name: OVN_ENCAP_PORT + value: {{ default 6081 .Values.global.encapPort | quote }} + - name: OVN_DISABLE_PKT_MTU_CHECK + value: {{ default "" .Values.global.disablePacketMtuCheck | quote }} + - name: OVN_NETFLOW_TARGETS + value: {{ default "" .Values.global.netFlowTargets | quote }} + - name: OVN_SFLOW_TARGETS + value: {{ default "" .Values.global.sflowTargets | quote }} + - name: OVN_IPFIX_TARGETS + value: {{ default "" .Values.global.ipfixTargets | quote }} + - name: OVN_IPFIX_SAMPLING + value: {{ default "" .Values.global.ipfixSampling | quote }} + - name: OVN_IPFIX_CACHE_MAX_FLOWS + value: {{ default "" .Values.global.ipfixCacheMaxFlows | quote }} + - name: OVN_IPFIX_CACHE_ACTIVE_TIMEOUT + value: {{ default "" .Values.global.ipfixCacheActiveTimeout | quote }} + - name: OVN_V4_JOIN_SUBNET + value: {{ default "" .Values.global.v4JoinSubnet | quote }} + - name: OVN_V6_JOIN_SUBNET + value: {{ default "" .Values.global.v6JoinSubnet | quote }} + - name: OVN_V4_MASQUERADE_SUBNET + value: {{ default "" .Values.global.v4MasqueradeSubnet | quote }} + - name: OVN_V6_MASQUERADE_SUBNET + value: {{ default "" .Values.global.v6MasqueradeSubnet | quote }} + - name: OVN_MULTICAST_ENABLE + value: {{ default "" .Values.global.enableMulticast | quote }} + - name: OVN_UNPRIVILEGED_MODE + value: {{ include "isUnprivilegedMode" . | quote }} + - name: OVN_EX_GW_NETWORK_INTERFACE + value: {{ default "" .Values.global.extGatewayNetworkInterface | quote }} + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + - name: OVN_REMOTE_PROBE_INTERVAL + value: {{ default 100000 .Values.global.remoteProbeInterval | quote }} + - name: OVN_MONITOR_ALL + value: {{ hasKey .Values.global "monitorAll" | ternary .Values.global.monitorAll true | quote }} + - name: OVN_OFCTRL_WAIT_BEFORE_CLEAR + value: {{ default "" .Values.global.ofctrlWaitBeforeClear | quote }} + - name: OVN_ENABLE_LFLOW_CACHE + value: {{ hasKey .Values.global "enableLFlowCache" | ternary .Values.global.enableLFlowCache true | quote }} + - name: OVN_LFLOW_CACHE_LIMIT + value: {{ default "" .Values.global.lFlowCacheLimit | quote }} + - name: OVN_LFLOW_CACHE_LIMIT_KB + value: {{ default "" .Values.global.lFlowCacheLimitKb | quote }} + - name: OVN_MULTI_NETWORK_ENABLE + value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} + - name: OVN_NETWORK_SEGMENTATION_ENABLE + value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} + - name: OVN_PRE_CONF_UDN_ADDR_ENABLE + value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} + - name: OVN_ADVERTISED_UDN_ISOLATION_MODE + value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_EMPTY_LB_EVENTS + value: {{ default "" .Values.global.emptyLbEvents | quote }} + - name: OVN_ACL_LOGGING_RATE_LIMIT + value: {{ default 20 .Values.global.aclLoggingRateLimit | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + - name: OVN_ENABLE_INTERCONNECT + value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} + - name: OVN_ENABLE_OVNKUBE_IDENTITY + value: {{ hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity false | quote }} + - name: OVN_ENABLE_SVC_TEMPLATE_SUPPORT + value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate false | quote }} + - name: OVN_ENABLE_DNSNAMERESOLVER + value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_OBSERV_ENABLE + value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + readinessProbe: + httpGet: + path: /metrics + port: {{ .Values.metricsPort }} + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + periodSeconds: 30 + # end of ovnkube-controller container + # ovn-controller + - name: ovn-controller + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovn-controller"] + securityContext: + runAsUser: 0 + capabilities: + add: ["SYS_NICE"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_CONTROLLER + value: {{ default "-vconsole:info" .Values.ovnControllerLogLevel | quote }} + - name: OVN_SSL_ENABLE + value: {{ default "" .Values.global.enableSsl | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # ovs-metrics-exporter - v3 + - name: ovs-metrics-exporter + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovs-metrics"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + # end of container + nodeSelector: + kubernetes.io/os: "linux" + k8s.ovn.org/dpu: "" + volumes: + # Common volumes + - name: host-var-run-dbus + hostPath: + path: /var/run/dbus + - name: host-kubelet + hostPath: + path: /var/lib/kubelet + - name: host-kubeconfig + hostPath: + path: /etc/kubernetes/ + - name: host-var-log-ovnkube + hostPath: + path: /var/log/ovn-kubernetes + - name: host-var-run-ovn-kubernetes + hostPath: + path: /var/run/ovn-kubernetes + - name: host-opt-cni-bin + hostPath: + path: /opt/cni/bin + - name: host-etc-cni-netd + hostPath: + path: /etc/cni/net.d + - name: host-slash + hostPath: + path: / + - name: host-netns + hostPath: + path: /var/run/netns + - name: host-var-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-var-run-ovs + hostPath: + path: /var/run/openvswitch + - name: host-ovn-cert + hostPath: + path: /etc/ovn + type: DirectoryOrCreate + - name: host-etc-ovs + hostPath: + path: /etc/openvswitch + - name: host-var-lib-ovs + hostPath: + path: /var/lib/openvswitch + - name: run-systemd + hostPath: + path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config + tolerations: + - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml new file mode 100644 index 0000000000..b7d2004c60 --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml @@ -0,0 +1,12 @@ +nbLogLevel: "-vconsole:info -vfile:info" +sbLogLevel: "-vconsole:info -vfile:info" +northdLogLevel: "-vconsole:info -vfile:info" +northdBackoffInterval: "0" +ovnkubeNodeLogLevel: 4 +ovnControllerLogLevel: "-vconsole:info" +logFileMaxSize: 100 +logFileMaxBackups: 5 +logFileMaxAge: 5 +libovsdbClientLogFile: "" +# -- TCP port serving metrics +metricsPort: 9476 diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index 001213d48a..19ffdf112d 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -64,6 +64,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -126,6 +129,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -191,6 +197,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -271,6 +280,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /etc/openvswitch/ name: host-etc-ovs readOnly: true @@ -419,6 +431,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -429,6 +445,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVNKUBE_NODE_MGMT_PORT_NETDEV value: {{ default "" .Values.global.nodeMgmtPortNetdev | quote }} - name: OVN_EMPTY_LB_EVENTS @@ -454,6 +472,8 @@ spec: value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate true | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} - name: OVN_NETWORK_QOS_ENABLE @@ -488,6 +508,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -562,6 +585,8 @@ spec: - matchExpressions: - key: k8s.ovn.org/dpu-host operator: DoesNotExist + - key: k8s.ovn.org/dpu + operator: DoesNotExist volumes: # Common volumes - name: host-var-run-dbus @@ -604,6 +629,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config - name: host-etc-ovs hostPath: path: /etc/openvswitch diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index 5dd4b4c231..e26b24a64b 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -69,6 +69,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -131,6 +134,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -196,6 +202,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -249,6 +258,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -317,12 +329,18 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} - name: OVN_DISABLE_SNAT_MULTIPLE_GWS @@ -370,6 +388,8 @@ spec: value: "local" - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} # end of container @@ -409,6 +429,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config - name: host-var-lib-ovs hostPath: path: /var/lib/openvswitch diff --git a/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml b/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml index 1f33d980d7..18855d51e6 100644 --- a/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml +++ b/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml @@ -84,9 +84,6 @@ spec: requests: cpu: 100m memory: 300Mi - limits: - cpu: 500m - memory: 500Mi env: - name: OVN_DAEMONSET_VERSION value: "1.2.0" diff --git a/helm/ovn-kubernetes/templates/ovn-setup.yaml b/helm/ovn-kubernetes/templates/ovn-setup.yaml index e9a5ef8981..b76ebf0929 100644 --- a/helm/ovn-kubernetes/templates/ovn-setup.yaml +++ b/helm/ovn-kubernetes/templates/ovn-setup.yaml @@ -50,6 +50,26 @@ data: mtu: {{ .Values.mtu | default 1500 | quote }} host_network_namespace: {{ $hostNetworkNamespace }} +--- +# ovnkube-config ConfigMap +# +# Configuration for ovnkube binaries +kind: ConfigMap +apiVersion: v1 +metadata: + name: ovnkube-config + namespace: ovn-kubernetes +data: + ovnkube.conf: | +{{- if .Values.global.enableNoOverlay }} + [default] + transport = no-overlay + + [no-overlay] + outbound-snat = disabled + routing = unmanaged +{{- end }} + {{- if or .Values.global.skipCallToK8s (eq (include "needNamespace" $hostNetworkNamespace) "true") }} --- # ovn-host-network-namespace.yaml @@ -64,6 +84,23 @@ metadata: name: {{ $hostNetworkNamespace }} {{- end }} +{{- if and (eq (hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false) true) (eq (hasKey .Values.global "advertiseDefaultNetwork" | ternary .Values.global.advertiseDefaultNetwork false) true) }} +--- +apiVersion: k8s.ovn.org/v1 +kind: RouteAdvertisements +metadata: + name: default +spec: + networkSelectors: + - networkSelectionType: DefaultNetwork + nodeSelector: {} + frrConfigurationSelector: + matchLabels: + name: receive-all + advertisements: + - "PodNetwork" +{{- end }} + {{- if (and .Values.global.dockerConfigSecret .Values.global.dockerConfigSecret.create) }} --- apiVersion: v1 diff --git a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml index cf111e6482..a2bec63d7e 100644 --- a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml +++ b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml @@ -162,6 +162,7 @@ rules: - egressfirewalls/status - adminpolicybasedexternalroutes/status - egressqoses/status + - routeadvertisements/status - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: ["policy.networking.k8s.io"] @@ -184,6 +185,7 @@ rules: - adminpolicybasedexternalroutes - userdefinednetworks - clusteruserdefinednetworks + - routeadvertisements - networkqoses - clusternetworkconnects verbs: [ "get", "list", "watch" ] diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index 77f1369af8..ae73d2827a 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -76,6 +76,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes enablePreconfiguredUDNAddresses: false # -- Configure to enable IPsec @@ -106,6 +116,8 @@ global: lFlowCacheLimitKb: "" # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index 34bd024435..f366632023 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -8,6 +8,7 @@ tags: ovnkube-node-dpu: false ovnkube-node-dpu-host: false ovnkube-single-node-zone: false + ovnkube-single-node-zone-dpu: false ovnkube-zone-controller: false # -- Endpoint of Kubernetes api server @@ -70,6 +71,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable IPsec enableIpsec: false # -- Use SSL transport to NB/SB db and northd @@ -93,6 +104,8 @@ global: enableLFlowCache: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Maximum number of logical flow cache entries ovn-controller may create when the logical flow cache is enabled # @default -- unlimited lFlowCacheLimit: "" @@ -183,4 +196,3 @@ monitoring: enableServiceMonitor: false # -- deploy PrometheusRules for specific metric collection using the Prometheus Operator enablePrometheusRule: false - diff --git a/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml b/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml new file mode 100644 index 0000000000..2105d1302b --- /dev/null +++ b/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml @@ -0,0 +1,176 @@ +# Values for ovn-kubernetes with single-node zone interconnect for DPU cluster +# Requires: ovnkube-single-node-zone-dpu only + +# -- The following subcharts should be disabled +tags: + ovs-node: false + ovn-ipsec: false + ovnkube-db: false + ovnkube-db-raft: false + ovnkube-master: false + ovnkube-node: false + ovnkube-control-plane: false + ovnkube-node-dpu-host: false + ovnkube-node-dpu: false + ovnkube-single-node-zone: false + ovnkube-zone-controller: false + +# -- Whether or not call `lookup` Helm function, set it to `true` if you want to run `helm dry-run/template/lint` +skipCallToK8s: false + +global: + # -- The interface on nodes that will be used for external gateway network traffic + extGatewayNetworkInterface: "" + # -- GENEVE UDP port (default 6081) + encapPort: 6081 + # -- The gateway mode (shared or local), if not given, gateway functionality is disabled + gatewayMode: shared + # -- Optional extra gateway options + gatewayOpts: "" + # -- This allows ovnkube-node to run without SYS_ADMIN capability, by performing interface setup in the CNI plugin + unprivilegedMode: false + # -- The v4 join subnet used for assigning join switch IPv4 addresses + v4JoinSubnet: "100.64.0.0/16" + # -- The v4 masquerade subnet used for assigning masquerade IPv4 addresses + v4MasqueradeSubnet: "169.254.0.0/17" + # -- The v4 subnet for transit switches and routers + v4TransitSubnet: "100.88.0.0/16" + # -- The v6 join subnet used for assigning join switch IPv6 addresses + v6JoinSubnet: "fd98::/64" + # -- The v6 masquerade subnet used for assigning masquerade IPv6 addresses + v6MasqueradeSubnet: "fd69::/112" + # -- The v6 subnet for transit switches and routers + v6TransitSubnet: "fd97::/64" + # -- Whether or not enable ovnkube identity webhook + enableOvnKubeIdentity: false + # -- Whether or not to enable hybrid overlay functionality + enableHybridOverlay: "" + # -- A comma separated set of IP subnets and the associated hostsubnetlengths (eg, \"10.128.0.0/14/23,10.0.0.0/14/23\") to use with the extended hybrid network + hybridOverlayNetCidr: "" + # -- Whether or not to use Admin Network Policy CRD feature with ovn-kubernetes + enableAdminNetworkPolicy: false + # -- Configure to use EgressIP CRD feature with ovn-kubernetes + enableEgressIp: false + # -- Configure EgressIP node reachability using gRPC on this TCP port + egressIpHealthCheckPort: 9107 + # -- Configure to use EgressService CRD feature with ovn-kubernetes + enableEgressService: false + # -- Configure to use EgressFirewall CRD feature with ovn-kubernetes + enableEgressFirewall: false + # -- Configure to use EgressQoS CRD feature with ovn-kubernetes + enableEgressQos: false + # -- Enables network QoS support from/to pods + enableNetworkQos: false + # -- Enables multicast support between the pods within the same namespace + enableMulticast: "" + # -- Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes + enableMultiNetwork: false + # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes + enableNetworkSegmentation: false + # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes + enablePreconfiguredUDNAddresses: false + # -- Configure to enable IPsec + enableIpsec: false + # -- Use SSL transport to NB/SB db and northd + enableSsl: false + # -- Configure to enable interconnecting multiple zones + # @default -- true + enableInterconnect: true + # -- Configure to use AdminPolicyBasedExternalRoute CRD feature with ovn-kubernetes + enableMultiExternalGateway: false + # -- Configure to use stateless network policy feature with ovn-kubernetes + enableStatelessNetworkPolicy: false + # -- Configure to use service template feature with ovn-kubernetes + enableSvcTemplate: false + # -- Enables metrics related to scaling + enableMetricsScale: "" + # -- Enables monitoring OVN-Kubernetes master and OVN configuration duration + enableConfigDuration: "" + # -- Indicates if ovn-controller should enable/disable the logical flow in-memory cache when processing Southbound database logical flow changes + # @default -- true + enableLFlowCache: true + # -- Maximum number of logical flow cache entries ovn-controller may create when the logical flow cache is enabled + # @default -- unlimited + lFlowCacheLimit: "" + # -- Maximum size of the logical flow cache (in KB) ovn-controller may create when the logical flow cache is enabled + lFlowCacheLimitKb: "" + # -- Configure to use the IPAMClaims CRD feature with ovn-kubernetes, thus granting persistent IPs across restarts / migration for KubeVirt VMs + enablePersistentIPs: false + # -- Configure to use DNSNameResolver feature with ovn-kubernetes + enableDNSNameResolver: false + # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws + disableSnatMultipleGws: "" + # -- Controls if forwarding is allowed on OVNK controlled interfaces + # @default -- false + disableForwarding: "" + # -- Disables adding openflow flows to check packets too large to be delivered to OVN due to pod MTU being lower than NIC MTU + disablePacketMtuCheck: "" + # -- The largest number of messages per second that gets logged before drop + # @default 20 + aclLoggingRateLimit: 20 + # -- If set, then load balancers do not get deleted when all backends are removed + emptyLbEvents: "" + # -- Port of north bound ovsdb + nbPort: 6641 + # -- Port of south bound ovsdb + sbPort: 6642 + # -- A comma separated set of NetFlow collectors to export flow data + netFlowTargets: "" + # -- A comma separated set of SFlow collectors to export flow data + sflowTargets: "" + # -- A comma separated set of IPFIX collectors to export flow data + ipfixTargets: "" + # -- Rate at which packets should be sampled and sent to each target collector + # @default 400 + ipfixSampling: "" + # -- Maximum number of IPFIX flow records that can be cached at a time + # @default 0, meaning disabled + ipfixCacheMaxFlows: "" + # -- Maximum period in seconds for which an IPFIX flow record is cached and aggregated before being sent + # @default 60 + ipfixCacheActiveTimeout: "" + # -- OVN remote probe interval in ms + # @default 100000 + remoteProbeInterval: 100000 + # -- Enable monitoring all data from SB DB instead of conditionally monitoring the data relevant to this node only + # @default true + monitorAll: true + # -- ovn-controller wait time in ms before clearing OpenFlow rules during start up + # @default 0 + ofctrlWaitBeforeClear: "0" + # -- Container images + # -- Image for DPUs + dpuImage: + # -- Image repository for ovn-kubernetes components + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu + # -- Specify image tag to run + tag: master + # -- Image pull policy + pullPolicy: IfNotPresent + # -- The name of secret used for pulling image. Use only if needed + imagePullSecretName: "" + # -- Endpoint of DPU Host cluster's Kubernetes api server + dpuHostClusterK8sAPIServer: https://172.25.0.2:6443 + # -- DPU Host cluster's Kubernetes Access Token + dpuHostClusterK8sToken: "" + # -- DPU Host cluster's Kubernetes Access Certs Data + dpuHostClusterK8sCACertData: "" + # -- DPU Host cluster's Kubernetes Access Token File + dpuHostClusterK8sTokenFile: "" + # -- DPU Host cluster's Kubernetes Access Certs File + dpuHostClusterK8sCACert: "" + # -- DPU Host cluster's Network CIDR + dpuHostClusterNetworkCIDR: 10.244.0.0/16/24 + # -- DPU Host cluster's Service CIDR + dpuHostClusterServiceCIDR: 10.96.0.0/16 + # -- MTU of network interface in a Kubernetes pod + mtu: 1400 + +# -- prometheus monitoring related fields +monitoring: + # -- specify the labels for serviceMonitors to be selected for target discovery. + # Prometheus operator defines what namespaces and what servicemonitors within these + # namespaces must be selected for target discovery. The fields defined below helps + # in defining that. + commonServiceMonitorSelectorLabels: + release: kube-prometheus-stack diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index 6356006c8c..5b2dcf6976 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -9,6 +9,7 @@ tags: ovnkube-master: false ovnkube-node: false ovnkube-node-dpu: false + ovnkube-single-node-zone-dpu: false ovnkube-node-dpu-host: false ovnkube-zone-controller: false @@ -76,6 +77,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes enablePreconfiguredUDNAddresses: false # -- Configure to enable IPsec @@ -107,6 +118,8 @@ global: enablePersistentIPs: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces @@ -169,4 +182,3 @@ monitoring: enableServiceMonitor: false # -- deploy PrometheusRules for specific metric collection using the Prometheus Operator enablePrometheusRule: false - diff --git a/mkdocs.yml b/mkdocs.yml index 3d09e08387..528753cce6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,10 +82,11 @@ nav: - Host To NodePort Hairpin: design/host-to-node-port-hairpin-trafficflow.md - ExternalIPs/LoadBalancerIngress: design/external-ip-and-loadbalancer-ingress.md - Internal Subnets: design/ovn-kubernetes-subnets.md - - Kubevirt VM Live Migration: design/live-migration.md + - Kubevirt VM Live Migration: features/live-migration.md - Getting Started: - Launching OVN-Kubernetes: installation/launching-ovn-kubernetes-on-kind.md - Launching OVN-Kubernetes Using Helm: installation/launching-ovn-kubernetes-with-helm.md + - Launching OVN-Kubernetes with DPU Acceleration: installation/launching-ovn-kubernetes-with-dpu.md - Configuration Guide: getting-started/configuration.md - CLI Guide: getting-started/cli-guide.md - Deploying Workloads on OVN-Kubernetes cluster: getting-started/example-pod-creation.md diff --git a/openshift/test/generated/zz_generated.annotations.go b/openshift/test/generated/zz_generated.annotations.go index 3ec39fa006..1cd08beb94 100644 --- a/openshift/test/generated/zz_generated.annotations.go +++ b/openshift/test/generated/zz_generated.annotations.go @@ -1141,12 +1141,16 @@ var AppendedAnnotations = map[string]string{ "Multicast when multicast enabled for namespace should be able to send multicast UDP traffic between nodes": "[Disabled:Unimplemented]", + "Network Policy: ICMP bypass allows ICMP between pods with default deny policy on the default network": "[Disabled:Unimplemented]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller pod connected to ClusterUserDefinedNetwork CR & managed NADs cannot be deleted when being used": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD according to spec in each target namespace and report active namespaces": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD in new created namespaces that apply to namespace-selector": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller should delete NAD when target namespace is terminating": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller when CR is deleted, should delete all managed NAD in each target namespace": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller when namespace-selector is mutated should create NAD in namespaces that apply to mutated namespace-selector": "[Suite:openshift/conformance/parallel]", @@ -1337,6 +1341,10 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Localnet using ClusterUserDefinedNetwork CR, pods in different namespaces, should communicate over localnet topology": "[Disabled:Unimplemented]", + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L2 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L3 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L2 primary UDN": "[Disabled:Unimplemented]", "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L3 primary UDN": "[Disabled:Unimplemented]", @@ -1365,6 +1373,8 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv6 reserved subnets": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: integration should recover ovnkube pods after restart with primary and secondary UDN resources": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN with custom network, cluster-networked pods, NodePort service": "[Suite:openshift/conformance/parallel]", "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN, cluster-networked pods, NodePort service": "[Disabled:Unimplemented]", @@ -1421,6 +1431,8 @@ var AppendedAnnotations = map[string]string{ "Services does not use host masquerade address as source IP address when communicating externally": "[Disabled:Unimplemented]", + "Services of type NodePort should be able to preserve UDP traffic when server pod cycles for a NodePort service via a different node": "[Disabled:Unimplemented]", + "Services of type NodePort should handle IP fragments": "[Disabled:Unimplemented]", "Services of type NodePort should listen on each host addresses": "[Disabled:Unimplemented]", @@ -1551,6 +1563,10 @@ var AppendedAnnotations = map[string]string{ "e2e delete databases recovering from deleting db files while maintaining connectivity when deleting both db files on ovnkube-db-2": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Cluster Default Network Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1591,6 +1607,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Cluster Default Network of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1631,6 +1651,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1671,6 +1695,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L3 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1711,6 +1739,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv6 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", diff --git a/test/e2e/acl_logging.go b/test/e2e/acl_logging.go index c5c129769b..f07a81e3c4 100644 --- a/test/e2e/acl_logging.go +++ b/test/e2e/acl_logging.go @@ -190,18 +190,20 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol nsNames [4]string ) BeforeEach(func() { + nsNames[0] = fr.Namespace.Name + suffix := framework.RandomSuffix() + nsNames[1] = fmt.Sprintf("anp-peer-restricted-%s", suffix) + nsNames[2] = fmt.Sprintf("anp-peer-open-%s", suffix) + nsNames[3] = fmt.Sprintf("anp-peer-unknown-%s", suffix) + By("creating an admin network policy") - err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name) + err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name, nsNames[1], nsNames[2], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the ANP") Expect(setANPACLLogSeverity(anpName, initialDenyACLSeverity, initialAllowACLSeverity, initialPassACLSeverity)).To(Succeed()) By("creating peer namespaces that are selected by the admin network policy") - nsNames[0] = fr.Namespace.Name - nsNames[1] = "anp-peer-restricted" - nsNames[2] = "anp-peer-open" - nsNames[3] = "anp-peer-unknown" for _, ns := range nsNames[1:] { _, err = e2ekubectl.RunKubectl("default", "create", "ns", ns) Expect(err).NotTo(HaveOccurred()) @@ -309,7 +311,7 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol }, maxPokeRetries*pokeInterval, pokeInterval).Should(BeTrue()) By("creating a baseline admin network policy") - err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name) + err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name, nsNames[1], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the BANP") @@ -956,7 +958,7 @@ func makeDenyAllPolicy(f *framework.Framework, ns string, policyName string) (*k return f.ClientSet.NetworkingV1().NetworkPolicies(ns).Create(context.TODO(), policy, metav1.CreateOptions{}) } -func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS string) error { +func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS string) error { anpYaml := "anp.yaml" var anpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: AdminNetworkPolicy @@ -974,20 +976,20 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-open" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-open + kubernetes.io/metadata.name: %s - name: "pass-to-unknown" action: "Pass" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, anpName, priority, anpSubjectNS) + kubernetes.io/metadata.name: %s +`, anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS) if err := os.WriteFile(anpYaml, []byte(anpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) @@ -1003,7 +1005,7 @@ spec: return err } -func makeBaselineAdminNetworkPolicy(banpSubjectNS string) error { +func makeBaselineAdminNetworkPolicy(banpSubjectNS, restrictedPeerNS, unknownPeerNS string) error { banpYaml := "banp.yaml" var banpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: BaselineAdminNetworkPolicy @@ -1020,14 +1022,14 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-unknown" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, banpSubjectNS) + kubernetes.io/metadata.name: %s +`, banpSubjectNS, restrictedPeerNS, unknownPeerNS) if err := os.WriteFile(banpYaml, []byte(banpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 87323cdb2b..be0313b305 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -1066,9 +1066,11 @@ var _ = ginkgo.Describe("test e2e pod connectivity to host addresses", func() { framework.Failf("Test requires >= 1 Ready nodes, but there are only %v nodes", len(nodes.Items)) } workerNodeName = nodes.Items[0].Name - // Add another IP address to the worker + // Add another IP address to the worker with preferred_lft 0 to mark it as deprecated. + // This prevents the IP from being selected as the node's primary gateway IP while still + // allowing the test to verify pod-to-host connectivity to non-node IPs. _, err = infraprovider.Get().ExecK8NodeCommand(workerNodeName, []string{"ip", "a", "add", - fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName()}) + fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName(), "preferred_lft", "0"}) framework.ExpectNoError(err, "failed to add IP to %s", workerNodeName) }) @@ -1946,6 +1948,20 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { return fmt.Sprintf(collectorContainerTemplate, port) } + getCollectorArgs := func(protocol flowMonitoringProtocol, port uint16) []string { + args := []string{"-kafka=false"} + switch protocol { + case sflow: + // Disable other collectors to avoid non-deterministic startup ordering in logs. + args = append(args, "-nf=false", "-nfl=false", "-sflow=true", fmt.Sprintf("-sflow.port=%d", port)) + case netflow_v5: + args = append(args, "-nf=false", "-sflow=false", "-nfl=true", fmt.Sprintf("-nfl.port=%d", port)) + case ipfix: + args = append(args, "-nfl=false", "-sflow=false", "-nf=true", fmt.Sprintf("-nf.port=%d", port)) + } + return args + } + keywordInLogs := map[flowMonitoringProtocol]string{ netflow_v5: "NETFLOW_V5", ipfix: "IPFIX", sflow: "SFLOW_5"} @@ -1966,7 +1982,7 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() framework.ExpectNoError(err, "failed to get primary network") collectorExternalContainer := infraapi.ExternalContainer{Name: getContainerName(collectorPort), Image: "cloudflare/goflow", - Network: primaryProviderNetwork, CmdArgs: []string{"-kafka=false"}, ExtPort: collectorPort} + Network: primaryProviderNetwork, CmdArgs: getCollectorArgs(protocol, collectorPort), ExtPort: collectorPort} collectorExternalContainer, err = providerCtx.CreateExternalContainer(collectorExternalContainer) if err != nil { framework.Failf("failed to start flow collector container %s: %v", getContainerName(collectorPort), err) @@ -1984,6 +2000,58 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { setEnv := map[string]string{ovnEnvVar: addressAndPort} setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app=ovnkube-node", + }) + if err != nil { + framework.Failf("could not get ovnkube-node pods: %v", err) + } + + if protocol == sflow { + ginkgo.By("Waiting for ovnkube-node to configure br-int sflow and setting sampling/polling for better signal") + for _, ovnKubeNodePod := range ovnKubeNodePods.Items { + var sFlowUUID string + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + getSFlowExecOptions := e2epod.ExecOptions{ + Command: []string{"ovs-vsctl", "--if-exists", "get", "bridge", "br-int", "sflow"}, + Namespace: ovnKubeNamespace, + PodName: ovnKubeNodePod.Name, + ContainerName: getNodeContainerName(), + CaptureStdout: true, + CaptureStderr: true, + } + rawUUID, stderr, execErr := e2epod.ExecWithOptions(f, getSFlowExecOptions) + if execErr != nil { + framework.Logf("waiting for sflow row on %s: query failed: %v, stderr: %s", + ovnKubeNodePod.Name, execErr, stderr) + return false, nil + } + rawUUID = strings.TrimSpace(strings.Trim(rawUUID, "\"")) + if rawUUID == "" || rawUUID == "[]" { + framework.Logf("waiting for sflow row on %s: br-int has no sflow row yet", ovnKubeNodePod.Name) + return false, nil + } + sFlowUUID = rawUUID + return true, nil + }) + framework.ExpectNoError(err, "timed out waiting for br-int sflow row on %s", ovnKubeNodePod.Name) + + setSFlowExecOptions := e2epod.ExecOptions{ + Command: []string{"ovs-vsctl", "--if-exists", "set", "sflow", sFlowUUID, "sampling=1", "polling=1"}, + Namespace: ovnKubeNamespace, + PodName: ovnKubeNodePod.Name, + ContainerName: getNodeContainerName(), + CaptureStdout: true, + CaptureStderr: true, + } + _, setStderr, setErr := e2epod.ExecWithOptions(f, setSFlowExecOptions) + if setErr != nil { + framework.Logf("skipping sflow sampling tuning on %s: failed to set sampling/polling for row %s: %v, stderr: %s", + ovnKubeNodePod.Name, sFlowUUID, setErr, setStderr) + } + } + } + ginkgo.By(fmt.Sprintf("Checking that the collector container received %s data", protocolStr)) keyword := keywordInLogs[protocol] collectorContainerLogsTest := func() wait.ConditionFunc { @@ -1995,14 +2063,14 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { } collectorContainerLogs = strings.TrimSuffix(collectorContainerLogs, "\n") logLines := strings.Split(collectorContainerLogs, "\n") - lastLine := logLines[len(logLines)-1] // check that flow monitoring traffic has been logged - if strings.Contains(lastLine, keyword) { - framework.Logf("Successfully found string %s in last log line of"+ - " the collector: %s", keyword, lastLine) - return true, nil + for _, line := range logLines { + if strings.Contains(line, keyword) { + framework.Logf("Successfully found string %s in collector logs line: %s", keyword, line) + return true, nil + } } - framework.Logf("%s not found in last log line: %s", keyword, lastLine) + framework.Logf("%s not found in collector logs", keyword) return false, nil } } @@ -2014,7 +2082,7 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { ginkgo.By(fmt.Sprintf("Unsetting %s variable in ovnkube-node daemonset", ovnEnvVar)) setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), nil, ovnEnvVar) - ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ + ovnKubeNodePods, err = f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", }) if err != nil { @@ -2032,9 +2100,9 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { CaptureStderr: true, } - targets, stderr, _ := e2epod.ExecWithOptions(f, execOptions) + targets, stderr, execErr := e2epod.ExecWithOptions(f, execOptions) framework.Logf("execOptions are %v", execOptions) - if err != nil { + if execErr != nil { framework.Failf("could not lookup ovs %s targets: %v", protocolStr, stderr) } gomega.Expect(targets).To(gomega.BeEmpty()) diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index dc68b02a98..b795e73bef 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -36,6 +36,7 @@ import ( e2enode "k8s.io/kubernetes/test/e2e/framework/node" "k8s.io/kubernetes/test/e2e/framework/pod" e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" utilnet "k8s.io/utils/net" ) @@ -2159,35 +2160,24 @@ spec: providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() framework.ExpectNoError(err, "failed to get providers primary network") externalContainerPrimary := infraapi.ExternalContainer{Name: "external-container-for-egressip-mtu-test", Image: images.AgnHost(), - Network: providerPrimaryNetwork, CmdArgs: []string{"pause"}, ExtPort: externalContainerPrimaryPort} + Network: providerPrimaryNetwork, RuntimeArgs: []string{"--sysctl", "net.ipv4.ip_no_pmtu_disc=2"}, + CmdArgs: []string{"netexec", httpPort, udpPort}, ExtPort: externalContainerPrimaryPort} externalContainerPrimary, err = providerCtx.CreateExternalContainer(externalContainerPrimary) framework.ExpectNoError(err, "failed to create external container: %s", externalContainerPrimary.String()) - // First disable PMTUD - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=2"}) - framework.ExpectNoError(err, "disabling PMTUD in the external kind container failed: %v", err) - providerCtx.AddCleanUpFn(func() error { - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=0"}) - return err - }) - - go func() { - _, _ = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"/agnhost", "netexec", httpPort, udpPort}) - }() - ginkgo.By("Checking connectivity to the external kind container and verify that the source IP is the egress IP") var curlErr error - _ = wait.PollUntilContextTimeout( + err = wait.PollUntilContextTimeout( context.Background(), retryInterval, retryTimeout, true, func(ctx context.Context) (bool, error) { - curlErr := curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) + curlErr = curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) return curlErr == nil, nil }, ) - framework.ExpectNoError(curlErr, "connectivity check to the external kind container failed: %v", curlErr) + framework.ExpectNoError(err, "connectivity check to the external kind container failed: %v", curlErr) // We will ask the server to reply with a UDP packet bigger than the pod // network MTU. Since PMTUD has been disabled on the server, the reply @@ -3443,6 +3433,130 @@ spec: } }) + ginkgo.It("Should fail if egressip-mark annotation is present during EgressIP creation", func() { + // This check can be removed when https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5879 is addressed + if isHelmEnabled() { + e2eskipper.Skipf("Skipping this test for HELM environment as we dont create required Validatingadmissionpolicy in a HELM environment") + } + + ginkgo.By("1. Create an EgressIP object with one egress IP defined") + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") + + var egressIPConfig = `apiVersion: k8s.ovn.org/v1 +kind: EgressIP +metadata: + name: ` + egressIPName + ` + annotations: + ` + util.EgressIPMarkAnnotation + `: "50000" +spec: + egressIPs: + - ` + egressIP1.String() + ` + namespaceSelector: + matchLabels: + name: ` + f.Namespace.Name + ` +` + if err := os.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressIPYaml); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + + ginkgo.By("2. Create an EgressIP with k8s.ovn.org/egressip-mark annotation defined") + _, err = e2ekubectl.RunKubectl("default", "create", "-f", egressIPYaml) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark annotation is present during creation") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("EgressIP resources cannot be created with the \"k8s.ovn.org/egressip-mark\" annotation. This annotation is managed by the system."))) + }) + + ginkgo.It("Should fail if egressip-mark annotation is being added by a regular user", func() { + // This check can be removed when https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5879 is addressed + if isHelmEnabled() { + e2eskipper.Skipf("Skipping this test for HELM environment as we dont create required Validatingadmissionpolicy in a HELM environment") + } + + ginkgo.By("1. Add the \"k8s.ovn.org/egress-assignable\" label to egress1Node node") + egressNodeAvailabilityHandler := egressNodeAvailabilityHandlerViaLabel{f} + egressNodeAvailabilityHandler.Enable(egress1Node.name) + defer egressNodeAvailabilityHandler.Restore(egress1Node.name) + + podNamespace := f.Namespace + labels := map[string]string{ + "name": f.Namespace.Name, + } + updateNamespaceLabels(f, podNamespace, labels) + + ginkgo.By("2. Create an EgressIP object with one egress IP defined") + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") + + var egressIPConfig = `apiVersion: k8s.ovn.org/v1 +kind: EgressIP +metadata: + name: ` + egressIPName + ` +spec: + egressIPs: + - ` + egressIP1.String() + ` + namespaceSelector: + matchLabels: + name: ` + f.Namespace.Name + ` +` + if err := os.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressIPYaml); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + + framework.Logf("Create the EgressIP configuration") + e2ekubectl.RunKubectlOrDie("default", "create", "-f", egressIPYaml) + + ginkgo.By("3. Check that the status is of length one and that it is assigned to egress1Node") + statuses := verifyEgressIPStatusLengthEquals(1, nil) + if statuses[0].Node != egress1Node.name { + framework.Failf("Step 3. Check that the status is of length one and that it is assigned to egress1Node, failed") + } + + ginkgo.By("4. Try updating k8s.ovn.org/egressip-mark annotation") + // Get the current annotation value to ensure we try to overwrite with a different value + annotationsJSON, err := e2ekubectl.RunKubectl("", "get", "egressip", egressIPName, "-o", "jsonpath={.metadata.annotations}") + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to get annotations") + var annotations map[string]string + err = json.Unmarshal([]byte(annotationsJSON), &annotations) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to unmarshal annotations JSON") + currentValue := annotations[util.EgressIPMarkAnnotation] + + newValue := 50000 + if currentValue == "50000" { + newValue = 50001 + } + + _, err = e2ekubectl.RunKubectl("", "annotate", "--overwrite", "egressip", egressIPName, fmt.Sprintf("%s=%d", util.EgressIPMarkAnnotation, newValue)) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark is being updated") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("The \"k8s.ovn.org/egressip-mark\" annotation cannot be modified or removed once set. This annotation is managed by the system."))) + + ginkgo.By("5. Try removing k8s.ovn.org/egressip-mark annotation") + _, err = e2ekubectl.RunKubectl("", "annotate", "--overwrite", "egressip", egressIPName, fmt.Sprintf("%s-", util.EgressIPMarkAnnotation)) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark is being removed") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("The \"k8s.ovn.org/egressip-mark\" annotation cannot be modified or removed once set. This annotation is managed by the system."))) + }) + ginkgo.DescribeTable("[OVN network] multiple namespaces with different primary networks", func(otherNetworkAttachParms networkAttachmentConfigParams) { if !isNetworkSegmentationEnabled() { ginkgo.Skip("network segmentation is disabled") diff --git a/test/e2e/infraprovider/api/api.go b/test/e2e/infraprovider/api/api.go index 2a38ef6595..99030444c2 100644 --- a/test/e2e/infraprovider/api/api.go +++ b/test/e2e/infraprovider/api/api.go @@ -18,6 +18,8 @@ type Provider interface { // PrimaryNetwork returns OVN-Kubernetes primary infrastructure network information PrimaryNetwork() (Network, error) + // ListNetworks returns the names of all networks + ListNetworks() ([]string, error) // GetNetwork returns a network GetNetwork(name string) (Network, error) // GetExternalContainerNetworkInterface fetches network interface information from the external container attached to a specific network diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 58a25d9774..31c28a04c5 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -68,6 +68,10 @@ func (k *kind) GetNetwork(name string) (api.Network, error) { return getNetwork(name) } +func (k *kind) ListNetworks() ([]string, error) { + return listNetworks() +} + func (k *kind) GetExternalContainerNetworkInterface(container api.ExternalContainer, network api.Network) (api.NetworkInterface, error) { return getNetworkInterface(container.Name, network.Name()) } @@ -607,6 +611,8 @@ const ( inspectNetworkMACKeyStr = "{{ with index .NetworkSettings.Networks %q }}{{ .MacAddress }}{{ end }}" inspectNetworkContainersKeyStr = "{{ range $key, $value := .Containers }}{{ printf \"%s\\n\" $value.Name}}{{ end }}'" emptyValue = "" + // Docker 29+ returns "invalid IP" for IP fields + emptyIPValue = "invalid IP" ) func isNetworkAttachedToContainer(networkName, containerName string) bool { @@ -627,13 +633,27 @@ func doesContainerNameExist(name string) (bool, error) { return state != "", nil } +func listNetworks() ([]string, error) { + output, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to list networks: %w", err) + } + var networks []string + for _, name := range strings.Split(strings.TrimSpace(string(output)), "\n") { + if name != "" { + networks = append(networks, name) + } + } + return networks, nil +} + func doesNetworkExist(networkName string) (bool, error) { - dataBytes, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + networks, err := listNetworks() if err != nil { - return false, fmt.Errorf("failed to list networks: %w", err) + return false, err } - for _, existingNetworkName := range strings.Split(strings.Trim(string(dataBytes), "\n"), "\n") { - if existingNetworkName == networkName { + for _, name := range networks { + if name == networkName { return true, nil } } @@ -715,57 +735,51 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac } valueStr := strings.Trim(string(value), "\n") valueStr = strings.Trim(valueStr, "'") - if valueStr == emptyValue { + if valueStr == emptyValue || valueStr == emptyIPValue { return "", nil } return valueStr, nil } - getIPFamilyFlagForIPRoute2 := func(ipStr string) (string, error) { + getIPFamilyForIPRoute2 := func(ipStr string) (string, error) { ip := net.ParseIP(ipStr) if ip == nil { return "", fmt.Errorf("invalid IP address: %s", ipStr) } if utilnet.IsIPv6(ip) { - return "-6", nil + return "inet6", nil } - return "-4", nil + return "inet", nil } getInterfaceNameUsingIP := func(ip string) (string, error) { - ipFlag, err := getIPFamilyFlagForIPRoute2(ip) + ipFamily, err := getIPFamilyForIPRoute2(ip) if err != nil { - return "", fmt.Errorf("failed to get IP family flag for %s: %w", ip, err) + return "", fmt.Errorf("failed to get IP family for %s: %w", ip, err) } - allInfAddrBytes, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "-br", ipFlag, "a", "sh").CombinedOutput() + cmdArgs := []string{"exec", "-i", containerName, "ip", "-o", "-f", ipFamily, "addr", "show"} + allInfAddrBytes, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() if err != nil { - return "", fmt.Errorf("failed to find interface with IP %s on container %s with command 'ip -br a sh': err %v, out: %s", ip, containerName, - err, allInfAddrBytes) + return "", fmt.Errorf("failed to find interface with IP %s on container %s with command %q: err %v, out: %s", ip, containerName, + strings.Join(cmdArgs[3:], " "), err, allInfAddrBytes) } - var ipLine string + var infName string for _, line := range strings.Split(string(allInfAddrBytes), "\n") { if strings.Contains(line, ip) { - ipLine = line + fields := strings.Fields(line) + if len(fields) < 2 { + return "", fmt.Errorf("failed to parse 'ip addr' output line %q", line) + } + infName = strings.TrimSuffix(fields[1], ":") + if strings.Contains(infName, "@") { + infName = strings.SplitN(infName, "@", 2)[0] + } break } } - if ipLine == "" { + if infName == "" { return "", fmt.Errorf("failed to find IP %q within 'ip a' command on container %q:\n\n%q", ip, containerName, string(allInfAddrBytes)) } - ipLineSplit := strings.Split(ipLine, " ") - if len(ipLine) == 0 { - return "", fmt.Errorf("failed to find interface name from 'ip a' output line %q", ipLine) - } - infNames := ipLineSplit[0] - splitChar := " " - if strings.Contains(infNames, "@") { - splitChar = "@" - } - infNamesSplit := strings.Split(infNames, splitChar) - if len(infNamesSplit) == 0 { - return "", fmt.Errorf("failed to extract inf name + veth name from %q splitting by %q", infNames, splitChar) - } - infName := infNamesSplit[0] // validate its an interface name on the Node with iproute2 out, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "link", "show", infName).CombinedOutput() if err != nil { @@ -805,7 +819,7 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac if ni.IPv6 != "" { ni.InfName, err = getInterfaceNameUsingIP(ni.IPv6) if err != nil { - framework.Logf("failed to get network interface name using IPv4 address %s: %v", ni.IPv6, err) + framework.Logf("failed to get network interface name using IPv6 address %s: %v", ni.IPv6, err) } } ni.IPv6Prefix, err = getContainerNetwork(inspectNetworkIPv6PrefixKeyStr) diff --git a/test/e2e/network_policy_icmp.go b/test/e2e/network_policy_icmp.go new file mode 100644 index 0000000000..510394a2fe --- /dev/null +++ b/test/e2e/network_policy_icmp.go @@ -0,0 +1,71 @@ +package e2e + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = ginkgo.Describe("Network Policy: ICMP bypass", feature.NetworkPolicy, func() { + f := wrappedTestFramework("network-policy-icmp") + + ginkgo.BeforeEach(func() { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("Allow ICMP bypass with NetworkPolicy is not enabled, skipping ICMP bypass network policy tests") + } + }) + + ginkgo.It("allows ICMP between pods with default deny policy on the default network", func() { + namespace := f.Namespace.Name + + ginkgo.By("creating a \"default deny\" network policy") + _, err := makeDenyAllPolicy(f, namespace, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating server and client pods") + serverPodName := "icmp-server" + clientPodName := "icmp-client" + serverCmd := []string{"/bin/bash", "-c", "/agnhost netexec --http-port 8000"} + clientCmd := []string{"/agnhost", "pause"} + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverNode := nodes.Items[0].Name + clientNode := nodes.Items[1].Name + + serverPod, err := createGenericPod(f, serverPodName, serverNode, namespace, serverCmd) + framework.ExpectNoError(err, "failed to create server pod") + _, err = createGenericPod(f, clientPodName, clientNode, namespace, clientCmd) + framework.ExpectNoError(err, "failed to create client pod") + + clientConfig := podConfiguration{name: clientPodName, namespace: namespace} + serverConfig := podConfiguration{name: serverPodName, namespace: namespace} + + ginkgo.By("verifying TCP is denied by the default deny policy") + gomega.Eventually(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + + ginkgo.By("verifying ICMP is allowed between pods") + serverIPs, err := podIPsFromStatus(f.ClientSet, namespace, serverPodName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(f.ClientSet, serverConfig, clientConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + }) +}) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 0aefc2236c..2760d5424e 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -1317,6 +1317,40 @@ spec: } }) + It("should delete NAD when target namespace is terminating", func() { + testTerminatingNs := f.Namespace.Name + "terminating" + + By("add new target namespace to CR namespace-selector") + patch := fmt.Sprintf(`[{"op": "add", "path": "./spec/namespaceSelector/matchExpressions/0/values/-", "value": "%s"}]`, testTerminatingNs) + _, err := e2ekubectl.RunKubectl("", "patch", clusterUserDefinedNetworkResource, testClusterUdnName, "--type=json", "-p="+patch) + Expect(err).NotTo(HaveOccurred()) + + By("create the target namespace") + _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testTerminatingNs, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is created in the namespace") + Eventually(func() error { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err + }, time.Second*15, time.Second*1).Should(Succeed(), "NAD should be created in target namespace") + + By("delete the namespace to trigger termination") + err = cs.CoreV1().Namespaces().Delete(context.Background(), testTerminatingNs, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is deleted from the terminating namespace") + Eventually(func() bool { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err != nil && kerrors.IsNotFound(err) + }, time.Second*30, time.Second*1).Should(BeTrue(), + "NAD should be deleted when namespace is terminating") + }) + It("should create NAD in new created namespaces that apply to namespace-selector", func() { testNewNs := f.Namespace.Name + "green" diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go index 4e42658588..fce9005126 100644 --- a/test/e2e/network_segmentation_default_network_annotation.go +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -177,11 +177,6 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat Expect(err).NotTo(HaveOccurred(), "Should create UserDefinedNetwork") Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udn.Namespace, udn.Name), 5*time.Second, time.Second).Should(Succeed()) - By("Creating a pod without the default-network annotation") - podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) - podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} - podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) - By("Creating a pod with the default-network annotation") nse := []nadapi.NetworkSelectionElement{{ @@ -200,6 +195,11 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat podWithAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} podWithAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithAnnotation) + By("Creating a pod without the default-network annotation") + podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) + podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} + podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) + By("Attempting to add the default-network annotation to the pod without annotation") podWithoutAnnotation.Annotations = map[string]string{ "v1.multus-cni.io/default-network": string(marshalledNSE), diff --git a/test/e2e/network_segmentation_integration.go b/test/e2e/network_segmentation_integration.go new file mode 100644 index 0000000000..f2563980bb --- /dev/null +++ b/test/e2e/network_segmentation_integration.go @@ -0,0 +1,304 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + mnpclient "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1beta1" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = Describe("Network Segmentation: integration", feature.NetworkSegmentation, func() { + f := wrappedTestFramework("network-segmentation-integration") + f.SkipNamespaceCreation = true + + var cs clientset.Interface + + BeforeEach(func() { + cs = f.ClientSet + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) + }) + + It("should recover ovnkube pods after restart with primary and secondary UDN resources", func() { + const ( + primaryUDNName = "primary-udn" + secondaryUDNName = "secondary-udn" + egressIPName = "udn-egressip" + udnPodName = "udn-egress-pod" + udnServiceName = "udn-service" + serviceTargetPort = 80 + nodeHostnameKey = "kubernetes.io/hostname" + egressPodLabelKey = "udn-egress-pod" + egressPodLabelVal = "enabled" + egressNSLabelKey = "udn-egress-namespace" + egressNSLabelValue = "enabled" + ) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("", "delete", "eip", egressIPName, "--ignore-not-found=true") + }) + + primaryNamespace := f.Namespace.Name + + By("creating a primary UDN and waiting until it is ready") + cleanupPrimaryUDN, err := createManifest(primaryNamespace, newPrimaryUserDefinedNetworkManifest(cs, primaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupPrimaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, primaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating a secondary UDN and waiting until it is ready") + cleanupSecondaryUDN, err := createManifest(primaryNamespace, newL2SecondaryUDNManifest(secondaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupSecondaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, secondaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("labeling the primary namespace so it matches the EgressIP namespace selector") + primaryNSObj, err := cs.CoreV1().Namespaces().Get(context.Background(), primaryNamespace, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + if primaryNSObj.Labels == nil { + primaryNSObj.Labels = map[string]string{} + } + primaryNSObj.Labels[egressNSLabelKey] = egressNSLabelValue + _, err = cs.CoreV1().Namespaces().Update(context.Background(), primaryNSObj, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("selecting one schedulable node for both pod placement and EgressIP assignment") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 1) + Expect(err).NotTo(HaveOccurred()) + Expect(nodes.Items).NotTo(BeEmpty()) + targetNode := nodes.Items[0].Name + + By(fmt.Sprintf("labeling node %s as egress assignable", targetNode)) + labelNodeForEgress(f, targetNode) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("default", "label", "node", targetNode, "k8s.ovn.org/egress-assignable-") + }) + + By("creating an EgressIP object selected by the primary UDN namespace and pod label") + var egressIP string + if isIPv4Supported(cs) { + egressIPv4, allocErr := ipalloc.NewPrimaryIPv4() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv4.String() + } else { + egressIPv6, allocErr := ipalloc.NewPrimaryIPv6() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv6.String() + } + cleanupEIP, err := createManifest("", createEIPManifest( + egressIPName, + map[string]string{egressPodLabelKey: egressPodLabelVal}, + map[string]string{egressNSLabelKey: egressNSLabelValue}, + egressIP, + )) + Expect(err).NotTo(HaveOccurred()) + defer cleanupEIP() + + By("creating a pod, service and network policy in the primary UDN namespace") + udnPodCfg := *podConfig( + udnPodName, + withCommand(func() []string { + return httpServerContainerCmd(serviceTargetPort) + }), + withLabels(map[string]string{egressPodLabelKey: egressPodLabelVal}), + withNodeSelector(map[string]string{nodeHostnameKey: targetNode}), + withNetworkAttachment([]nadapi.NetworkSelectionElement{ + {Name: secondaryUDNName}, + }), + ) + udnPodCfg.namespace = primaryNamespace + udnPod := runUDNPod(cs, primaryNamespace, udnPodCfg, nil) + Expect(udnPod).NotTo(BeNil()) + var secondaryAttachmentStatus []nadapi.NetworkStatus + Eventually(func() ([]nadapi.NetworkStatus, error) { + udnPod, err = cs.CoreV1().Pods(primaryNamespace).Get(context.Background(), udnPod.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + secondaryAttachmentStatus, err = podNetworkStatus(udnPod, func(status nadapi.NetworkStatus) bool { + return status.Name == namespacedName(primaryNamespace, secondaryUDNName) + }) + return secondaryAttachmentStatus, err + }, 30*time.Second, time.Second).Should(HaveLen(1)) + + By("ensuring EgressIP is assigned to the same node as the pod") + Expect(waitForEgressIPAssignedNode(egressIPName, targetNode)).To(Succeed()) + + By("creating a multi network policy for the secondary UDN") + mnpCli, err := mnpclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) + const secondaryUDNMNPName = "secondary-udn-default-deny" + secondaryUDNMNP := &mnpapi.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: secondaryUDNMNPName, + Annotations: map[string]string{ + PolicyForAnnotation: secondaryUDNName, + }, + }, + Spec: mnpapi.MultiNetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{egressPodLabelKey: egressPodLabelVal}, + }, + PolicyTypes: []mnpapi.MultiPolicyType{ + mnpapi.PolicyTypeIngress, + mnpapi.PolicyTypeEgress, + }, + }, + } + _, err = mnpCli.MultiNetworkPolicies(primaryNamespace).Create(context.Background(), secondaryUDNMNP, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + _ = mnpCli.MultiNetworkPolicies(primaryNamespace).Delete(context.Background(), secondaryUDNMNPName, metav1.DeleteOptions{}) + }) + + _, err = cs.CoreV1().Services(primaryNamespace).Create(context.Background(), &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: udnServiceName, + }, + Spec: v1.ServiceSpec{ + Selector: map[string]string{egressPodLabelKey: egressPodLabelVal}, + Ports: []v1.ServicePort{ + { + Name: "http", + Port: serviceTargetPort, + Protocol: v1.ProtocolTCP, + TargetPort: intstr.FromInt(serviceTargetPort), + }, + }, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + _, err = makeDenyAllPolicy(f, primaryNamespace, "deny-all") + Expect(err).NotTo(HaveOccurred()) + + By("restarting each ovnkube pod and ensuring all pods recover without crash loops") + Expect(restartAllOVNKubePodsAndAssertHealthy(f)).To(Succeed()) + }) +}) + +func restartAllOVNKubePodsAndAssertHealthy(f *framework.Framework) error { + ovnNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + pods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list ovnkube pods in namespace %s: %w", ovnNamespace, err) + } + + restartedPods := 0 + for i := range pods.Items { + pod := pods.Items[i] + if !strings.HasPrefix(pod.Name, "ovnkube-") || pod.Status.Phase != v1.PodRunning { + continue + } + restartedPods++ + framework.Logf("restarting ovnkube pod %s/%s", pod.Namespace, pod.Name) + if err := deletePodWithWait(context.Background(), f.ClientSet, &pod); err != nil { + return fmt.Errorf("failed restarting ovnkube pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + } + if restartedPods == 0 { + return fmt.Errorf("no running ovnkube pods found in namespace %s", ovnNamespace) + } + + if err := waitOVNKubernetesHealthy(f); err != nil { + return fmt.Errorf("ovn-kubernetes did not become healthy after restarting %d pods: %w", restartedPods, err) + } + + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + if err := assertOVNKubePodsReadyAndNotCrashLooping(f.ClientSet, ovnNamespace); err != nil { + framework.Logf("ovnkube pod readiness/crashloop check still failing: %v", err) + return false, nil + } + return true, nil + }) +} + +func assertOVNKubePodsReadyAndNotCrashLooping(cs clientset.Interface, namespace string) error { + pods, err := cs.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed listing ovnkube pods: %w", err) + } + + found := 0 + for _, pod := range pods.Items { + if !strings.HasPrefix(pod.Name, "ovnkube-") { + continue + } + found++ + if pod.Status.Phase != v1.PodRunning { + return fmt.Errorf("pod %s is not running (phase=%s)", pod.Name, pod.Status.Phase) + } + + ready := false + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodReady && condition.Status == v1.ConditionTrue { + ready = true + break + } + } + if !ready { + return fmt.Errorf("pod %s is not ready", pod.Name) + } + + for _, status := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { + if status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff" { + return fmt.Errorf("pod %s container %s is in CrashLoopBackOff", pod.Name, status.Name) + } + } + } + + if found == 0 { + return fmt.Errorf("no ovnkube pods found in namespace %s", namespace) + } + return nil +} + +func waitForEgressIPAssignedNode(egressIPName, nodeName string) error { + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + egressIPStdout, err := e2ekubectl.RunKubectl("", "get", "eip", egressIPName, "-o", "json") + if err != nil { + framework.Logf("failed to fetch EgressIP %s status: %v", egressIPName, err) + return false, nil + } + + var eip egressIP + if err := json.Unmarshal([]byte(egressIPStdout), &eip); err != nil { + return false, fmt.Errorf("failed to unmarshal EgressIP %s status: %w", egressIPName, err) + } + + if len(eip.Status.Items) == 0 { + framework.Logf("EgressIP %s has no status items yet", egressIPName) + return false, nil + } + + for _, status := range eip.Status.Items { + if status.Node == nodeName { + return true, nil + } + } + framework.Logf("EgressIP %s not assigned to node %s yet (statuses: %+v)", egressIPName, nodeName, eip.Status.Items) + return false, nil + }) +} diff --git a/test/e2e/network_segmentation_policy.go b/test/e2e/network_segmentation_policy.go index 44f47598b9..0fc8216c59 100644 --- a/test/e2e/network_segmentation_policy.go +++ b/test/e2e/network_segmentation_policy.go @@ -207,6 +207,123 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", feature.Networ ), ) + ginkgo.DescribeTable( + "ICMP should bypass default deny policy for UDNs when enabled", + func( + netConfigParams networkAttachmentConfigParams, + clientPodConfig podConfiguration, + serverPodConfig podConfiguration, + ) { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("ICMP Network Policy bypass is not enabled, skipping ICMP bypass network policy tests") + } + + ginkgo.By("Creating the attachment configuration") + netConfig := newNetworkAttachmentConfig(netConfigParams) + netConfig.namespace = f.Namespace.Name + netConfig.cidr = filterCIDRsAndJoin(cs, netConfig.cidr) + _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( + context.Background(), + generateNAD(netConfig, f.ClientSet), + metav1.CreateOptions{}, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating client/server pods") + serverPodConfig.namespace = f.Namespace.Name + clientPodConfig.namespace = f.Namespace.Name + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].GetName()} + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].GetName()} + runUDNPod(cs, f.Namespace.Name, serverPodConfig, nil) + runUDNPod(cs, f.Namespace.Name, clientPodConfig, nil) + + ginkgo.By("creating a \"default deny\" network policy") + _, err = makeDenyAllPolicy(f, f.Namespace.Name, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + var serverIPs []string + for i, cidr := range strings.Split(netConfig.cidr, ",") { + if cidr == "" { + continue + } + serverIP, err := getPodAnnotationIPsForAttachmentByIndex( + cs, + f.Namespace.Name, + serverPodConfig.name, + namespacedName(f.Namespace.Name, netConfig.name), + i, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + serverIPs = append(serverIPs, serverIP) + } + gomega.Expect(serverIPs).NotTo(gomega.BeEmpty()) + + ginkgo.By("asserting the *client* pod can ping the server pod despite the default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + + ginkgo.By("asserting the *client* pod can not reach the server pod HTTP endpoint due to default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + } + }, + ginkgo.Entry( + "in L2 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer2", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ginkgo.Entry( + "in L3 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer3", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ) + ginkgo.DescribeTable( "allow ingress traffic to one pod from a particular namespace", func( diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index c9335c2a95..11826272e8 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -898,814 +898,860 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork var ra *rav1.RouteAdvertisements var hostNetworkPort int - ginkgo.BeforeEach(func() { - ginkgo.By("Configuring primary UDN namespaces") - var err error - udnNamespaceA, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - f.Namespace = udnNamespaceA - udnNamespaceB, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.Context("", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { + ginkgo.BeforeAll(func() { + ginkgo.By("Configuring primary UDN namespaces") + var err error + // Create namespaces directly via the API instead of f.CreateNamespace() + // to avoid framework cleaning them up in AfterEach + udnNamespaceA, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + f.Namespace = udnNamespaceA + udnNamespaceB, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Configuring networks") - cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceA.Name}, - }}} - cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceB.Name}, - }}} + ginkgo.By("Configuring networks") + cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceA.Name}, + }}} + cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceB.Name}, + }}} - // set a common label used to advertise both networks with one RA - cudnATemplate.Labels["advertised-networks-isolation"] = "" - cudnBTemplate.Labels["advertised-networks-isolation"] = "" + // set a common label used to advertise both networks with one RA + cudnATemplate.Labels["advertised-networks-isolation"] = "" + cudnBTemplate.Labels["advertised-networks-isolation"] = "" - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnATemplate.Spec.Network.Layer3 != nil { - cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) - } - if cudnATemplate.Spec.Network.Layer2 != nil { - cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) - } - if cudnBTemplate.Spec.Network.Layer3 != nil { - cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) - } - if cudnBTemplate.Spec.Network.Layer2 != nil { - cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) - } + if cudnATemplate.Spec.Network.Layer3 != nil { + cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) + } + if cudnATemplate.Spec.Network.Layer2 != nil { + cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) + } + if cudnBTemplate.Spec.Network.Layer3 != nil { + cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) + } + if cudnBTemplate.Spec.Network.Layer2 != nil { + cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) + } - cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Waiting for networks to be ready") - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + ginkgo.By("Waiting for networks to be ready") + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - ginkgo.By("Selecting 3 schedulable nodes") - nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) - // create host networked pod - ginkgo.By("Creating host network pods on each node") - // get random port in case the test retries and port is already in use on host node - min := 25000 - max := 25999 - hostNetworkPort = rand.Intn(max-min+1) + min - framework.Logf("Random host networked port chosen: %d", hostNetworkPort) - for _, node := range nodes.Items { - // this creates a udp / http netexec listener which is able to receive the "hostname" - // command. We use this to validate that each endpoint is received at least once - args := []string{ - "netexec", - fmt.Sprintf("--http-port=%d", hostNetworkPort), - fmt.Sprintf("--udp-port=%d", hostNetworkPort), + ginkgo.By("Selecting 3 schedulable nodes") + nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) + // create host networked pod + ginkgo.By("Creating host network pods on each node") + // get random port in case the test retries and port is already in use on host node + min := 25000 + max := 25999 + hostNetworkPort = rand.Intn(max-min+1) + min + framework.Logf("Random host networked port chosen: %d", hostNetworkPort) + + ginkgo.By("Setting up pods and services") + + // Create all pod specs upfront as distinct objects. + var hostNetPods []*corev1.Pod + for _, node := range nodes.Items { + p := e2epod.NewAgnhostPod(f.Namespace.Name, node.Name+"-hostnet-ep", nil, nil, nil, + "netexec", + fmt.Sprintf("--http-port=%d", hostNetworkPort), + fmt.Sprintf("--udp-port=%d", hostNetworkPort)) + p.Spec.NodeName = node.Name + p.Spec.HostNetwork = true + hostNetPods = append(hostNetPods, e2epod.NewPodClient(f).Create(context.TODO(), p)) } - // create host networked Pods - _, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { - p.Spec.Containers[0].Args = args - p.Spec.HostNetwork = true - }) + podNetASpecs := []*corev1.Pod{ + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + } + for _, p := range podNetASpecs { + p.Spec.NodeName = nodes.Items[0].Name + p.Labels = map[string]string{"network": cudnA.Name} + } + podNetASpecs[2].Spec.NodeName = nodes.Items[1].Name - framework.ExpectNoError(err) - } + podNetBSpec := e2epod.NewAgnhostPod(udnNamespaceB.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetBSpec.Spec.NodeName = nodes.Items[1].Name + podNetBSpec.Labels = map[string]string{"network": cudnB.Name} - ginkgo.By("Setting up pods and services") - podsNetA = []*corev1.Pod{} - pod := e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") - pod.Spec.NodeName = nodes.Items[0].Name - pod.Labels = map[string]string{"network": cudnA.Name} - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - pod.Name = fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name) - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - pod.Name = fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name) - pod.Spec.NodeName = nodes.Items[1].Name - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, pod.Labels) - svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} - familyPolicy := corev1.IPFamilyPolicyPreferDualStack - svc.Spec.IPFamilyPolicy = &familyPolicy - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetA, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podNetDefaultSpec := e2epod.NewAgnhostPod("default", fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetDefaultSpec.Spec.NodeName = nodes.Items[1].Name + podNetDefaultSpec.Labels = map[string]string{"network": "default"} - pod.Name = fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name) - pod.Namespace = udnNamespaceB.Name - pod.Labels = map[string]string{"network": cudnB.Name} - podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).CreateSync(context.TODO(), pod) - framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + // Submit all pods to the API without waiting for readiness. + podsNetA = []*corev1.Pod{} + for _, p := range podNetASpecs { + podsNetA = append(podsNetA, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } + podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).Create(context.TODO(), podNetBSpec) + podNetDefault = e2epod.PodClientNS(f, "default").Create(context.TODO(), podNetDefaultSpec) - svc.Name = fmt.Sprintf("service-%s", cudnB.Name) - svc.Namespace = pod.Namespace - svc.Spec.Selector = pod.Labels - svcNodePortNetB, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Create services (don't need pods to be ready). + familyPolicy := corev1.IPFamilyPolicyPreferDualStack - pod.Name = fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name) - pod.Namespace = "default" - pod.Labels = map[string]string{"network": "default"} - podNetDefault = e2epod.PodClientNS(f, "default").CreateSync(context.TODO(), pod) + svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, map[string]string{"network": cudnA.Name}) + svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} + svc.Spec.IPFamilyPolicy = &familyPolicy + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svc.Name = "service-default" - svc.Namespace = "default" - svc.Spec.Selector = pod.Labels - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = fmt.Sprintf("service-%s", cudnB.Name) + svc.Namespace = udnNamespaceB.Name + svc.Spec.Selector = map[string]string{"network": cudnB.Name} + svcNodePortNetB, err = f.ClientSet.CoreV1().Services(udnNamespaceB.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in default namespace - svc.Name = "nodeport-default-etp-local" - svc.Spec.Type = corev1.ServiceTypeNodePort - svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal - svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = "service-default" + svc.Namespace = "default" + svc.Spec.Selector = map[string]string{"network": "default"} + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA - svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) - svc.Namespace = udnNamespaceA.Name - svc.Spec.Selector = map[string]string{"network": cudnA.Name} - svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // create one nodePort service with externalTrafficPolicy=Local in default namespace + svc.Name = "nodeport-default-etp-local" + svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Expose networks") - ra = &rav1.RouteAdvertisements{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "advertised-networks-isolation-ra", - }, - Spec: rav1.RouteAdvertisementsSpec{ - NetworkSelectors: apitypes.NetworkSelectors{ - apitypes.NetworkSelector{ - NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, - ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ - NetworkSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA + svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) + svc.Namespace = udnNamespaceA.Name + svc.Spec.Selector = map[string]string{"network": cudnA.Name} + svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for all pods to be ready (they've been scheduling in parallel). + for _, p := range append(hostNetPods, append(podsNetA, podNetB, podNetDefault)...) { + framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, p.Name, p.Namespace, framework.PodStartTimeout)) + } + // Re-get pods to have updated status (e.g. pod IPs). + for i, p := range podsNetA { + podsNetA[i], err = f.ClientSet.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + } + podNetB, err = f.ClientSet.CoreV1().Pods(podNetB.Namespace).Get(context.TODO(), podNetB.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + podNetDefault, err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Get(context.TODO(), podNetDefault.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Expose networks") + ra = &rav1.RouteAdvertisements{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "advertised-networks-isolation-ra", + }, + Spec: rav1.RouteAdvertisementsSpec{ + NetworkSelectors: apitypes.NetworkSelectors{ + apitypes.NetworkSelector{ + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + }, }, }, }, + NodeSelector: metav1.LabelSelector{}, + FRRConfigurationSelector: metav1.LabelSelector{}, + Advertisements: []rav1.AdvertisementType{ + rav1.PodNetwork, + }, }, - NodeSelector: metav1.LabelSelector{}, - FRRConfigurationSelector: metav1.LabelSelector{}, - Advertisements: []rav1.AdvertisementType{ - rav1.PodNetwork, - }, - }, - } - - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } - ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + raClient, err := raclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure route advertisement matching both networks was created successfully") - gomega.Eventually(func() string { - ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) - if err != nil { - return "" - } - condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") - if condition == nil { - return "" - } - return condition.Reason - }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure routes from UDNs are learned by the external FRR router") - serverContainerIPs := getBGPServerContainerIPs(f) - for _, serverContainerIP := range serverContainerIPs { - for _, node := range nodes.Items { - if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) - } else { - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + ginkgo.By("ensure route advertisement matching both networks was created successfully") + gomega.Eventually(func() string { + ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) + if err != nil { + return "" + } + condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") + if condition == nil { + return "" + } + return condition.Reason + }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + + ginkgo.By("ensure routes from UDNs are learned by the external FRR router") + serverContainerIPs := getBGPServerContainerIPs(f) + for _, serverContainerIP := range serverContainerIPs { + for _, node := range nodes.Items { + if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) + } else { + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + } } } - } - }) - - ginkgo.AfterEach(func() { - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + }) - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnB != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnB = nil - } - if cudnA != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + ginkgo.AfterAll(func() { + if udnNamespaceA != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + if udnNamespaceB != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnA = nil - } + if cudnB != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnB = nil + } + if cudnA != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnA = nil + } - if podNetDefault != nil { - err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - podNetDefault = nil - } + if podNetDefault != nil { + err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podNetDefault = nil + } - if svcNodePortNetDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortNetDefault = nil - } - if svcNodePortETPLocalDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + if svcNodePortNetDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortNetDefault = nil + } + if svcNodePortETPLocalDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortETPLocalDefault = nil + } + + raClient, err := raclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortETPLocalDefault = nil - } - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if ra != nil { + err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ra = nil + } - if ra != nil { - err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ra = nil - } - }) + // Delete the namespaces manually since they were created directly + // via the API (not via f.CreateNamespace) to avoid framework's + // AfterEach cleanup. + if udnNamespaceA != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceA = nil + } + if udnNamespaceB != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceB = nil + } + }) - ginkgo.DescribeTable("connectivity between networks", - func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { - // checkConnectivity performs a curl command from a specified client (pod or node) - // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the - // client. - var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { - curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "2", "--insecure", targetAddress} - var out string - var err error - if clientNamespace != "" { - framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) - stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) - out = stdout + "\n" + stderr - if err != nil { - return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) - } - } else { - framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) - out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) - if err != nil { - // out is empty on error and error contains out... - return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + ginkgo.DescribeTable("connectivity between networks", + func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { + // checkConnectivity performs a curl command from a specified client (pod or node) + // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the + // client. + var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { + curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "1", "--insecure", targetAddress} + var out string + var err error + if clientNamespace != "" { + framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) + stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) + out = stdout + "\n" + stderr + if err != nil { + return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) + } + } else { + framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) + out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) + if err != nil { + // out is empty on error and error contains out... + return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + } } - } - client := clientName - if clientNamespace != "" { - client = clientNamespace + "/" + client - } - framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) - return out, nil - } - for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { - clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) - asyncAssertion := gomega.Eventually - timeout := time.Second * 30 - if expectErr { - // When the connectivity check is expected to fail it should be failing consistently - asyncAssertion = gomega.Consistently - timeout = time.Second * 15 + client := clientName + if clientNamespace != "" { + client = clientNamespace + "/" + client + } + framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) + return out, nil } - asyncAssertion(func() error { - out, err := checkConnectivity(clientName, clientNamespace, dst) - if expectErr != (err != nil) { - return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) + for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { + clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) + asyncAssertion := gomega.Eventually + timeout := time.Second * 30 + if expectErr { + // When the connectivity check is expected to fail it should be failing consistently + asyncAssertion = gomega.Consistently + timeout = time.Second * 5 } - if expectedOutput != "" { - if !strings.Contains(out, expectedOutput) { - return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + asyncAssertion(func() error { + out, err := checkConnectivity(clientName, clientNamespace, dst) + if expectErr != (err != nil) { + return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) } - } - return nil - }, timeout).Should(gomega.BeNil()) - } - }, - ginkgo.Entry("pod to pod on the same network and same node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[1] are on the same node - clientPod := podsNetA[0] - srvPod := podsNetA[1] + if expectedOutput != "" { + if !strings.Contains(out, expectedOutput) { + return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + } + } + return nil + }, timeout).Should(gomega.BeNil()) + } + }, + ginkgo.Entry("pod to pod on the same network and same node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[1] are on the same node + clientPod := podsNetA[0] + srvPod := podsNetA[1] - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod on the same network and different nodes should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientPod := podsNetA[0] - srvPod := podsNetA[2] - - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod connectivity on different networks and same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[2] and podNetB are on the same node - clientPod := podsNetA[2] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: - // - "loose": Pod connectivity is allowed, test expects success - // - anything else (including unset): Treated as "strict", pod connectivity is blocked - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod on the same network and different nodes should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientPod := podsNetA[0] + srvPod := podsNetA[2] - // With the above underlay routing configuration client pod can reach server pod. - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - - ginkgo.Entry("pod to pod connectivity on different networks and different nodes", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podNetB are on different nodes - clientPod := podsNetA[0] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod connectivity on different networks and same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[2] and podNetB are on the same node + clientPod := podsNetA[2] + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: + // - "loose": Pod connectivity is allowed, test expects success + // - anything else (including unset): Treated as "strict", pod connectivity is blocked + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + // With the above underlay routing configuration client pod can reach server pod. + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podNetB are on the same node - clientPod := podNetDefault - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podsNetA[0] are on different nodes - clientPod := podNetDefault - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access a UDN service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the UDN should be able to access a service in the same network", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - err := true - out := curlConnectionTimeoutCode - if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { - // FIXME: prevent looping of traffic in L2 UDNs - // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on - // which is a never ending loop - // this causes curl timeout with code 7 host unreachable instead of code 28 - out = "" - } - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err - }), - ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false - }), - ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // Get kubernetes service from default namespace - kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) - framework.ExpectNoError(err, "should be able to get kubernetes service") - - // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver - // Today the kubernetes.default service is single-stack and cannot be dual-stack. - if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { - e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") - } - // Get the cluster IP for the specified IP family - clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) - gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) - - // Access the kubernetes API at the cluster IP directly on port 443 - return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a local UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientNode := podsNetA[0].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a different node UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientNode := podsNetA[2].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("UDN pod to local node should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - // FIXME: add the host process socket to the VRF for this test to work. - // This scenario is something that is not supported yet. So the test will continue to fail. - // This works the same on both normal UDNs and advertised UDNs. - // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. - // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 - // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true - }), - ginkgo.Entry("UDN pod to a different node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } + ginkgo.Entry("pod to pod connectivity on different networks and different nodes", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podNetB are on different nodes + clientPod := podsNetA[0] + srvPod := podNetB - clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) - clientNodeIP := clientNodeIPv4 - if ipFamily == utilnet.IPv6 { - clientNodeIP = clientNodeIPv6 - } - // pod -> node traffic should use the node's IP as the source for advertised UDNs. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on the same node. - // client is on nodes[0]. Let's hit nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. - // Just check that the connection is successful. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on a different node. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. - // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. - // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB - // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> - // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. - // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. - // On the GR we DNAT to backend pod and SNAT to joinIP. - // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply - // fails as it doesn't know how to reach this masqueradeIP. - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by podNetB. - // We want to hit the nodeport on a different node from the client. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false - }), - - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podNetB are on the same node + clientPod := podNetDefault + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podsNetA[0] are on different nodes + clientPod := podNetDefault + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access a UDN service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the UDN should be able to access a service in the same network", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + err := true + out := curlConnectionTimeoutCode + if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { + // FIXME: prevent looping of traffic in L2 UDNs + // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on + // which is a never ending loop + // this causes curl timeout with code 7 host unreachable instead of code 28 + out = "" + } + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err + }), + ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false + }), + ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // Get kubernetes service from default namespace + kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) + framework.ExpectNoError(err, "should be able to get kubernetes service") + + // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver + // Today the kubernetes.default service is single-stack and cannot be dual-stack. + if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { + e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") + } + // Get the cluster IP for the specified IP family + clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) + gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) + + // Access the kubernetes API at the cluster IP directly on port 443 + return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a local UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientNode := podsNetA[0].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a different node UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientNode := podsNetA[2].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("UDN pod to local node should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + // FIXME: add the host process socket to the VRF for this test to work. + // This scenario is something that is not supported yet. So the test will continue to fail. + // This works the same on both normal UDNs and advertised UDNs. + // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. + // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 + // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true + }), + ginkgo.Entry("UDN pod to a different node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) + clientNodeIP := clientNodeIPv4 + if ipFamily == utilnet.IPv6 { + clientNodeIP = clientNodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // So we hit nodeport on nodes[1]. - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // pod -> node traffic should use the node's IP as the source for advertised UDNs. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on the same node. + // client is on nodes[0]. Let's hit nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. + // Just check that the connection is successful. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on a different node. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. - // So we hit nodeport on nodes[0]. - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. + // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. + // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB + // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> + // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. + // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. + // On the GR we DNAT to backend pod and SNAT to joinIP. + // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply + // fails as it doesn't know how to reach this masqueradeIP. + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ) + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by podNetB. + // We want to hit the nodeport on a different node from the client. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // So we hit nodeport on nodes[1]. + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. + // So we hit nodeport on nodes[0]. + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ) + }) }, ginkgo.Entry("Layer3", @@ -1879,6 +1925,19 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA // isolation doesn't cut it. macvlan driver might be a better option. bgpServerSubnetIPv4 = "172.38.0.0/16" bgpServerSubnetIPv6 = "fc00:f853:ccd:38::/64" + // Additional subnets used in nested "When there is other network" tests + otherBGPPeerSubnetIPv4 = "172.136.0.0/16" + otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" + otherBGPServerSubnetIPv4 = "172.138.0.0/16" + otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" + ) + + // staleSubnets lists all subnets that may be left behind if a test times out during cleanup. + staleSubnets := sets.New( + bgpPeerSubnetIPv4, bgpPeerSubnetIPv6, + bgpServerSubnetIPv4, bgpServerSubnetIPv6, + otherBGPPeerSubnetIPv4, otherBGPPeerSubnetIPv6, + otherBGPServerSubnetIPv4, otherBGPServerSubnetIPv6, ) f := wrappedTestFramework(baseName) @@ -1898,6 +1957,21 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA testNetworkName = testBaseName bgpServerName = testNetworkName + "-bgpserver" + // Clean up any stale networks from previous test attempts that may have failed during cleanup. + networkNames, err := infraprovider.Get().ListNetworks() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, name := range networkNames { + network, err := infraprovider.Get().GetNetwork(name) + if err != nil { + continue + } + v4, v6, _ := network.IPv4IPv6Subnets() + if staleSubnets.Has(v4) || staleSubnets.Has(v6) { + framework.Logf("Cleaning up stale network %q with subnets %s/%s", name, v4, v6) + gomega.Expect(ictx.DeleteNetwork(network)).To(gomega.Succeed()) + } + } + // we will create a agnhost server on an extra network peered with BGP ginkgo.By("Running a BGP network with an agnhost server") bgpPeerCIDRs := []string{bgpPeerSubnetIPv4, bgpPeerSubnetIPv6} @@ -2233,12 +2307,8 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA ginkgo.Describe("When there is other network", func() { const ( - otherBGPPeerSubnetIPv4 = "172.136.0.0/16" - otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" - otherBGPServerSubnetIPv4 = "172.138.0.0/16" - otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" - otherUDNCIDRv4 = "103.203.0.0/16" - otherUDNCIDRv6 = "2014:200:200::0/60" + otherUDNCIDRv4 = "103.203.0.0/16" + otherUDNCIDRv6 = "2014:200:200::0/60" ) var ( diff --git a/test/e2e/service.go b/test/e2e/service.go index 2180fcc595..1de696e6e0 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -830,6 +830,151 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { // network is removed by provider Context API }) + ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a NodePort service via a different node", func(ctx context.Context) { + const ( + serviceName = "svc-udp" + srcPort = 12345 + podClient = "pod-client" + podBackend1 = "pod-server-1" + podBackend2 = "pod-server-2" + ) + var clientNodeInfo, serverNodeInfo, backendNodeInfo nodeInfo + + cs := f.ClientSet + ns := f.Namespace.Name + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + e2eskipper.Skipf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + family := v1.IPv4Protocol + if IsIPv6Cluster(cs) { + family = v1.IPv6Protocol + } + + ips := e2enode.GetAddressesByTypeAndFamily(&nodes.Items[0], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + clientNodeInfo = nodeInfo{ + name: nodes.Items[0].Name, + nodeIP: ips[0], + } + + ips = e2enode.GetAddressesByTypeAndFamily(&nodes.Items[1], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + backendNodeInfo = nodeInfo{ + name: nodes.Items[1].Name, + nodeIP: ips[0], + } + + ips = e2enode.GetAddressesByTypeAndFamily(&nodes.Items[2], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + serverNodeInfo = nodeInfo{ + name: nodes.Items[2].Name, + nodeIP: ips[0], + } + + // Create a NodePort service + udpJig := e2eservice.NewTestJig(cs, ns, serviceName) + ginkgo.By("creating a UDP service " + serviceName + " with type=NodePort in " + ns) + udpService, err := udpJig.CreateUDPService(ctx, func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeNodePort + svc.Spec.Ports = []v1.ServicePort{ + {Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)}, + } + }) + framework.ExpectNoError(err) + + // Create a pod in one node to create the UDP traffic against the NodePort service every 5 seconds + ginkgo.By("creating a client pod for probing the service " + serviceName) + clientPod := e2epod.NewAgnhostPod(ns, podClient, nil, nil, nil) + nodeSelection := e2epod.NodeSelection{Name: clientNodeInfo.name} + e2epod.SetNodeSelection(&clientPod.Spec, nodeSelection) + cmd := fmt.Sprintf(`date; for i in $(seq 1 3000); do echo "$(date) Try: ${i}"; echo hostname | nc -u -w 5 -p %d %s %d; echo; done`, srcPort, serverNodeInfo.nodeIP, udpService.Spec.Ports[0].NodePort) + clientPod.Spec.Containers[0].Command = []string{"/bin/sh", "-c", cmd} + clientPod.Spec.Containers[0].Name = podClient + e2epod.NewPodClient(f).CreateSync(ctx, clientPod) + + // Read the client pod logs + logs, err := e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + + // Add a backend pod to the service in the other node + ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName + " at node " + backendNodeInfo.name) + serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) + serverPod1.Labels = udpJig.Labels + nodeSelection = e2epod.NodeSelection{Name: backendNodeInfo.name} + e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection) + e2epod.NewPodClient(f).CreateSync(ctx, serverPod1) + + ginkgo.By("Waiting for the endpoint to be ready") + err = framework.WaitForServiceEndpointsNum(ctx, f.ClientSet, f.Namespace.Name, + serviceName, 1, time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", + serviceName, f.Namespace.Name) + + logContainsFn := func(text, podName string) wait.ConditionWithContextFunc { + return func(ctx context.Context) (bool, error) { + logs, err := e2epod.GetPodLogs(ctx, cs, ns, podName, podName) + if err != nil { + // Retry the error next time. + return false, nil + } + if !strings.Contains(string(logs), text) { + return false, nil + } + return true, nil + } + } + // Note that the fact that Endpoints object already exists, does NOT mean + // that openflows were already programmed. + // Additionally take into account that UDP conntract entries timeout is + // 30 seconds by default. + // Based on the above check if the pod receives the traffic. + ginkgo.By("checking client pod connected to the backend 1 on Node IP " + serverNodeInfo.nodeIP) + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, logContainsFn(podBackend1, podClient)); err != nil { + logs, err = e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + framework.Failf("Failed to connect to backend 1") + } + + // Create a second pod + ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName + " at node " + backendNodeInfo.name) + serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) + serverPod2.Labels = udpJig.Labels + nodeSelection = e2epod.NodeSelection{Name: backendNodeInfo.name} + e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection) + e2epod.NewPodClient(f).CreateSync(ctx, serverPod2) + + // and delete the first pod + framework.Logf("Cleaning up %s pod", podBackend1) + e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) + + ginkgo.By("Waiting for the endpoint to be ready") + err = framework.WaitForServiceEndpointsNum(ctx, f.ClientSet, f.Namespace.Name, + serviceName, 1, time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", + serviceName, f.Namespace.Name) + + // Check that the second pod keeps receiving traffic + // UDP conntrack entries timeout is 30 sec by default + ginkgo.By("checking client pod connected to the backend 2 on Node IP " + serverNodeInfo.nodeIP) + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, logContainsFn(podBackend2, podClient)); err != nil { + logs, err = e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + framework.Failf("Failed to connect to backend 2") + } + }) + ginkgo.It("should listen on each host addresses", func() { endPoints := make([]*v1.Pod, 0) endpointsSelector := map[string]string{"servicebackend": "true"} diff --git a/test/e2e/util.go b/test/e2e/util.go index 08a9568d2d..df01c07b24 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1243,6 +1243,7 @@ func wrappedTestFramework(basename string) *framework.Framework { func newPrivelegedTestFramework(basename string) *framework.Framework { f := framework.NewDefaultFramework(basename) f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + f.NamespacePodSecurityWarnLevel = admissionapi.LevelPrivileged f.DumpAllNamespaceInfo = func(ctx context.Context, f *framework.Framework, namespace string) { debug.DumpAllNamespaceInfo(context.TODO(), f.ClientSet, namespace) } @@ -1439,11 +1440,22 @@ func isNetworkSegmentationEnabled() bool { return present && val == "true" } +func isICMPNetworkPolicyBypassEnabled() bool { + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_ALLOW_ICMP_NETPOL") + return val == "true" +} + func isLocalGWModeEnabled() bool { val, present := os.LookupEnv("OVN_GATEWAY_MODE") return present && val == "local" } +func isHelmEnabled() bool { + val, present := os.LookupEnv("USE_HELM") + return present && val == "true" +} + func isPreConfiguredUdnAddressesEnabled() bool { ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_PRE_CONF_UDN_ADDR_ENABLE") diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index b9be17fd9f..e9cee42c41 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -187,6 +187,11 @@ else # https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5569 skip "Multi Homing" fi + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == false ]; then + # Skip IPv6/dual-stack multihoming secondary network tests in IPv4-only clusters. + skip "Multi Homing.*L3 - routed - secondary network with IPv6 subnet" + skip "Multi Homing.*L3 - routed - secondary network with a dual stack configuration" + fi # these tests require metallb but the configuration we do for it is not compatible with the configuration we do to advertise the default network # TODO: consolidate configuration skip "Load Balancer Service Tests with MetalLB" @@ -225,6 +230,20 @@ if [ "${PARALLEL:-false}" = "true" ]; then skip_label "$SERIAL_LABEL" fi +if [ "$ENABLE_NO_OVERLAY" == true ]; then + # No-overlay mode uses underlying network infrastructure directly. + # Overlay-dependent features are not supported. + skip_label "Feature:Multicast" + skip_label "Feature:EgressIP" + skip_label "Feature:EgressService" + # This test validates MTU reduction behavior specific to overlay mode (1500->1400). + # In no-overlay mode, pods use the full underlying network MTU without reduction. + skip "blocking ICMP needs frag" + # This test validates MTU reduction due to Geneve encapsulation overhead (1400->1342). + # In no-overlay mode, there is no encapsulation and thus no MTU overhead. + skip "Pod to pod TCP with low MTU" +fi + # setting these is required to make RuntimeClass tests work ... :/ export KUBE_CONTAINER_RUNTIME=remote export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock