diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index edd80849b2..e28f96a64e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -16,10 +16,24 @@ env: UBUNTU_IMAGE_NAME: ovn-kube-ubuntu BUILDER_IMAGE: quay.io/projectquay/golang:1.24 jobs: - build: - name: Build Images - runs-on: ubuntu-latest + # Build Fedora image for each platform + build-fedora: + name: Build Fedora (${{ matrix.platform }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: true + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + - name: Check out code into the Go module directory uses: actions/checkout@v4 @@ -39,8 +53,8 @@ jobs: with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up environment run: | export GOPATH=$(go env GOPATH) @@ -64,23 +78,19 @@ jobs: pushd dist/images echo "ref: ${BRANCH} commit: ${COMMIT}" > git_info popd - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - platforms: all - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - name: Extract metadata (tags, labels) for fedora ovn-k image - id: meta-fedora + id: meta uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }} - name: Build and push Fedora based Docker image + id: build uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} @@ -89,23 +99,201 @@ jobs: push: true build-args: | BUILDER_IMAGE=${{ env.BUILDER_IMAGE }} - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta-fedora.outputs.tags }} - labels: ${{ steps.meta-fedora.outputs.labels }} + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,scope=fedora-${{ env.PLATFORM_PAIR }} + cache-to: type=gha,mode=max,scope=fedora-${{ env.PLATFORM_PAIR }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-fedora-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + # Merge Fedora multi-platform images + merge-fedora: + name: Merge Fedora + runs-on: ubuntu-latest + needs: build-fedora + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-fedora-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for fedora ovn-k image + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.FEDORA_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + # Build Ubuntu image for each platform + build-ubuntu: + name: Build Ubuntu (${{ matrix.platform }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: true + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go-controller/go.mod' + # Disabling cache to avoid warnings until these two issues are fixed + # https://github.com/actions/setup-go/issues/424 + # https://github.com/actions/setup-go/issues/403 + # cache-dependency-path: "**/*.sum" + cache: false + id: go + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up environment + run: | + export GOPATH=$(go env GOPATH) + echo "GOPATH=$GOPATH" >> $GITHUB_ENV + echo "$GOPATH/bin" >> $GITHUB_PATH + + - name: Build ovnkube-binaries copy to context + run: | + pushd go-controller + make + popd + + pushd dist/images + cp -r ../../go-controller/_output/go/bin/* . + popd + + - name: Generate git-info to write to image + run: | + BRANCH=$(git rev-parse --short "$GITHUB_SHA") + COMMIT=$(git rev-parse HEAD) + pushd dist/images + echo "ref: ${BRANCH} commit: ${COMMIT}" > git_info + popd + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 - name: Extract metadata (tags, labels) for ubuntu ovn-k image - id: meta-ubuntu + id: meta uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }} - name: Build and push Ubuntu based Docker image + id: build uses: docker/build-push-action@v5 with: builder: ${{ steps.buildx.outputs.name }} context: ./dist/images file: ./dist/images/Dockerfile.ubuntu push: true - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta-ubuntu.outputs.tags }} - labels: ${{ steps.meta-ubuntu.outputs.labels }} + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,scope=ubuntu-${{ env.PLATFORM_PAIR }} + cache-to: type=gha,mode=max,scope=ubuntu-${{ env.PLATFORM_PAIR }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-ubuntu-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + # Merge Ubuntu multi-platform images + merge-ubuntu: + name: Merge Ubuntu + runs-on: ubuntu-latest + needs: build-ubuntu + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-ubuntu-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the GH Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for ubuntu ovn-k image + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPOSITORY }}/${{ env.UBUNTU_IMAGE_NAME }}:${{ steps.meta.outputs.version }} \ No newline at end of file diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml index b9fbdbd50e..9369e659a2 100644 --- a/.github/workflows/performance-test.yml +++ b/.github/workflows/performance-test.yml @@ -44,6 +44,7 @@ jobs: # target: ["shard-conformance", "control-plane", "multi-homing", "multi-node-zones", "node-ip-mac-migration", "compact-mode", "serial"] # shard-conformance: hybrid-overlay = multicast-enable = emptylb-enable = false # control-plane: hybrid-overlay = multicast-enable = emptylb-enable = true + # perf-test: ["all","kubelet-density-cni", "udn-density-l2-noPods", "cudn-density-l2-noPods"] # ha: ["HA", "noHA"] # gateway-mode: ["local", "shared"] # ipfamily: ["ipv4", "ipv6", "dualstack"] @@ -57,7 +58,10 @@ jobs: # network-segmentation : ["", "enable-network-segmentation"] # traffic-flow-tests : "" include: - - {"target": "node-density-cni", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "kubelet-density-cni", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "udn-density-l2-noPods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "cudn-density-l2-noPods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} + - {"target": "control-plane", perf-test: "udn-density-l2-pods", "ha": "HA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3", "network-segmentation": ""} env: ES_SERVER: "${{ secrets.PERF_DATASTORE }}" JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}-${{ matrix.ic }}" @@ -66,11 +70,11 @@ jobs: OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" OVN_HA: "${{ matrix.ha == 'HA' }}" OVN_DISABLE_SNAT_MULTIPLE_GWS: "${{ matrix.disable-snat-multiple-gws == 'noSnatGW' }}" - KIND_INSTALL_METALLB: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'network-segmentation' }}" + KIND_INSTALL_METALLB: "false" OVN_GATEWAY_MODE: "${{ matrix.gateway-mode }}" OVN_SECOND_BRIDGE: "${{ matrix.second-bridge == '2br' }}" - ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' || matrix.target == 'kv-live-migration' || matrix.target == 'network-segmentation' || matrix.target == 'tools' || matrix.target == 'multi-homing-helm' || matrix.target == 'traffic-flow-test-only' || matrix.routeadvertisements != '' }}" - ENABLE_NETWORK_SEGMENTATION: "${{ matrix.target == 'network-segmentation' || matrix.network-segmentation == 'enable-network-segmentation' }}" + ENABLE_MULTI_NET: true + ENABLE_NETWORK_SEGMENTATION: true PLATFORM_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" PLATFORM_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" KIND_INSTALL_KUBEVIRT: "${{ matrix.target == 'kv-live-migration' }}" @@ -98,9 +102,17 @@ jobs: KIND_INSTALL_PROMETHEUS: "true" KIND_PROMETHEUS_INFRA_ONLY: "true" METRICS_IP: "127.0.0.1" + GH_TOKEN: "${{ secrets.GITHUB_TOKEN }}" steps: - uses: actions/checkout@v4 + # Debug session for the performance test + #- name: Setup tmate session + # id: tmate + # uses: mxschmitt/action-tmate@v3 + # with: + # detached: true + - name: Get PR info for issue comment if: github.event_name == 'issue_comment' id: pr_info @@ -281,7 +293,7 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: prometheus-install-logs + name: prometheus-install-logs-${{ matrix.perf-test }}-${{ github.run_id }} path: prometheus-install.log retention-days: 7 @@ -290,13 +302,8 @@ jobs: curl -L https://github.com/kube-burner/kube-burner/releases/download/v2.1.0/kube-burner-V2.1.0-linux-x86_64.tar.gz | tar xz chmod +x kube-burner sudo mv kube-burner /usr/local/bin/ - echo "KUBE_BURNER_VERSION=${KUBE_BURNER_VERSION}" >> $GITHUB_ENV - - name: git clone kube-burner repo - run: | - git clone http://github.com/kube-burner/kube-burner - - - name: Run kube-burner kubelet-density test + - name: "Run kube-burner ${{ matrix.perf-test }} workload" timeout-minutes: 120 run: | kind get kubeconfig > kconfig @@ -308,121 +315,71 @@ jobs: # Make sure the port-forward is up prior to running the workload sleep 30 - cp ./contrib/perf/metric-endpoint.yml kube-burner/examples/workloads/kubelet-density-cni - cp -f ./contrib/perf/workloads/kubelet-density-cni.yml kube-burner/examples/workloads/kubelet-density-cni/ - cp ./contrib/perf/performance-meta.yml kube-burner/examples/workloads/kubelet-density-cni - cp ./contrib/perf/metric-endpoint-local.yml kube-burner/examples/workloads/kubelet-density-cni - cp ./contrib/perf/metrics.yml kube-burner/examples/workloads/kubelet-density-cni - - cd kube-burner/examples/workloads/kubelet-density-cni + cd contrib/perf #Generate metadata. envsubst < performance-meta.yml > perf-meta.yml if [[ -z "${ES_SERVER}" ]]; then - kube-burner init --config kubelet-density-cni.yml -e metric-endpoint-local.yml --user-metadata perf-meta.yml + kube-burner init --config workloads/${{ matrix.perf-test }}.yml -e metric-endpoint-local.yml --user-metadata perf-meta.yml else - kube-burner init --config kubelet-density-cni.yml -e metric-endpoint.yml --user-metadata perf-meta.yml + kube-burner init --config workloads/${{ matrix.perf-test }}.yml -e metric-endpoint.yml --user-metadata perf-meta.yml fi - mkdir /tmp/pprof-data - cp pprof-data/* /tmp/pprof-data - - - name: Export kube-burner data - if: always() - run: | - mkdir -p /tmp/kube-burner - cp -r kube-burner/* /tmp/kube-burner/ + mkdir -p /tmp/${{ matrix.perf-test }}/pprof-data + mkdir -p /tmp/${{ matrix.perf-test }}/perf + cp -r pprof-data/* /tmp/${{ matrix.perf-test }}/pprof-data + cp -r * /tmp/${{ matrix.perf-test }}/perf - name: Generate performance report - if: always() + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} run: | - # Change to the kube-burner metrics directory - cd kube-burner/examples/workloads/kubelet-density-cni - - # Generate the performance report (without posting comment) - python3 ../../../../contrib/perf/generate_perf_report.py \ - --metrics-dir metrics/ \ - --output performance_report.md \ - --title "OVN-Kubernetes Performance Test Results - Run ${{ github.run_id }}" + cd contrib/perf + # Generate the performance report + python3 generate_perf_report.py \ + --workload ${{ matrix.perf-test }} \ + --metrics-dir /tmp/${{ matrix.perf-test }}/perf/metrics/ \ + --output /tmp/${{ matrix.perf-test }}/performance_report.md \ + --title "OVN-Kubernetes Performance Test Results - Run ${{ github.run_id }}" \ + --pr-number ${{ github.event_name == 'issue_comment' && github.event.issue.number || github.event_name == 'pull_request' && github.event.pull_request.number || github.event_name == 'workflow_dispatch' && '' }} \ + --github-comment echo "Performance report generated successfully" - cat performance_report.md + cat /tmp/${{ matrix.perf-test }}/performance_report.md - - name: Post performance report as PR comment - id: post_comment - if: always() && (github.event_name == 'pull_request' || github.event_name == 'issue_comment') - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const path = 'kube-burner/examples/workloads/kubelet-density-cni/performance_report.md'; - - if (fs.existsSync(path)) { - const report = fs.readFileSync(path, 'utf8'); - - // Get the issue/PR number based on event type - const issueNumber = context.eventName === 'pull_request' ? - context.payload.pull_request.number : - context.payload.issue.number; - - try { - await github.rest.issues.createComment({ - issue_number: issueNumber, - owner: context.repo.owner, - repo: context.repo.repo, - body: report - }); - console.log('Performance report posted as PR comment'); - return { success: true }; - } catch (error) { - console.error('Failed to post PR comment:', error.message); - return { success: false }; - } - } else { - console.log('Performance report file not found'); - return { success: false }; - } - - - name: Upload performance report as fallback - if: always() && (steps.post_comment.outcome == 'failure' || steps.post_comment.outputs.result == '{"success":false}' || (github.event_name != 'pull_request' && github.event_name != 'issue_comment')) + - name: Upload performance report + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: performance-report-${{ github.run_id }} - path: kube-burner/examples/workloads/kubelet-density-cni/performance_report.md + name: ${{ matrix.perf-test }}-performance-report-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/performance_report.md - name: Upload pprof data - if: always() + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: pprof-${{ github.run_id }} - path: /tmp/pprof-data - + name: ${{ matrix.perf-test }}-pprof-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/pprof-data - - name: Upload kube-burner data - if: always() + - name: Upload performance test data + if: ${{ matrix.perf-test == 'kubelet-density-cni' || matrix.perf-test == 'udn-density-l2-noPods' || matrix.perf-test == 'cudn-density-l2-noPods' || matrix.perf-test == 'udn-density-l2-pods' }} uses: actions/upload-artifact@v4 with: - name: kube-burner-performance-job-${{ github.run_id }} - path: /tmp/kube-burner - - - name: Runner Diagnostics - if: always() - uses: ./.github/actions/diagnostics + name: ${{ matrix.perf-test }}-performance-test-data-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }} - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + mkdir -p /tmp/${{ matrix.perf-test }}/logs + kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/${{ matrix.perf-test }}/logs - name: Upload kind logs if: always() uses: actions/upload-artifact@v4 with: - name: kind-logs-performance-job-${{ github.run_id }} - path: /tmp/kind/logs - + name: ${{ matrix.perf-test }}-kind-logs-${{ github.run_id }} + path: /tmp/${{ matrix.perf-test }}/logs build-pr: name: Build-PR @@ -430,7 +387,11 @@ jobs: if: | (github.event_name != 'issue_comment') || (github.event.issue.pull_request && - contains(github.event.comment.body, '/perf-test node-density-cni')) || + contains(github.event.comment.body, '/perf-test kubelet-density-cni')) || + (github.event.issue.pull_request && + contains(github.event.comment.body, '/perf-test udn-density-l2-noPods')) || + (github.event.issue.pull_request && + contains(github.event.comment.body, '/perf-test cudn-density-l2-noPods')) || (github.event_name == 'workflow_dispatch') steps: - name: Restore PR image cache diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31e3bfa6dd..4955bfffc2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -370,8 +370,7 @@ jobs: - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + ./contrib/export-kind-logs.sh set -x docker ps -a docker exec ovn-control-plane crictl images @@ -417,9 +416,7 @@ jobs: - name: Export kind logs if: always() - run: | - mkdir -p /tmp/kind/logs-kind-pr-branch - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs-kind-pr-branch + run: ./contrib/export-kind-logs.sh /tmp/kind/logs-kind-pr-branch - name: Upload kind logs if: always() @@ -457,6 +454,8 @@ jobs: - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default"} - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default"} + - {"target": "shard-conformance", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "no-overlay": "true"} + - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "no-overlay": "true"} - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "shard-conformance", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} - {"target": "control-plane", "ha": "HA", "gateway-mode": "shared", "ipfamily": "ipv6", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-disabled"} @@ -488,6 +487,8 @@ jobs: - {"target": "bgp", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "dns-name-resolver": "enable-dns-name-resolver"} - {"target": "bgp", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "dns-name-resolver": "enable-dns-name-resolver"} - {"target": "bgp", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv6", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation"} + - {"target": "bgp-no-overlay-helm", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "no-overlay": "true"} + - {"target": "bgp-no-overlay", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "no-overlay": "true"} - {"target": "bgp-loose-isolation", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "snatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "routeadvertisements": "advertise-default", "network-segmentation": "enable-network-segmentation", "advertised-udn-isolation-mode": "loose"} - {"target": "traffic-flow-test-only","ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "noSnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "traffic-flow-tests": "1-24", "network-segmentation": "enable-network-segmentation"} - {"target": "tools", "ha": "noHA", "gateway-mode": "local", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "network-segmentation": "enable-network-segmentation"} @@ -496,8 +497,8 @@ jobs: env: JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}-${{ matrix.ic }}" OVN_HYBRID_OVERLAY_ENABLE: ${{ (matrix.target == 'control-plane' || matrix.target == 'control-plane-helm') && (matrix.ipfamily == 'ipv4' || matrix.ipfamily == 'dualstack' ) }} - OVN_MULTICAST_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" - OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || matrix.target == 'bgp' || matrix.target == 'bgp-loose-isolation' }}" + OVN_MULTICAST_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') || startsWith(matrix.target, 'bgp') }}" + OVN_EMPTY_LB_EVENTS: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'bgp') }}" OVN_HA: "${{ matrix.ha == 'HA' }}" OVN_DISABLE_SNAT_MULTIPLE_GWS: "${{ matrix.disable-snat-multiple-gws == 'noSnatGW' }}" KIND_INSTALL_METALLB: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' || startsWith(matrix.target, 'network-segmentation') }}" @@ -514,7 +515,7 @@ jobs: KIND_NUM_WORKER: "${{ matrix.num-workers }}" KIND_NUM_NODES_PER_ZONE: "${{ matrix.num-nodes-per-zone }}" OVN_DISABLE_FORWARDING: "${{ matrix.forwarding == 'disable-forwarding' }}" - USE_HELM: "${{ matrix.target == 'control-plane-helm' || matrix.target == 'multi-homing-helm' }}" + USE_HELM: "${{ matrix.target == 'control-plane-helm' || matrix.target == 'multi-homing-helm' || matrix.target == 'bgp-no-overlay-helm' }}" OVN_ENABLE_DNSNAMERESOLVER: "${{ matrix.dns-name-resolver == 'enable-dns-name-resolver' }}" OVN_NETWORK_QOS_ENABLE: "${{ matrix.target == 'control-plane' || matrix.target == 'control-plane-helm' }}" TRAFFIC_FLOW_TESTS: "${{ matrix.traffic-flow-tests }}" @@ -529,6 +530,7 @@ jobs: OVN_UNPRIVILEGED_MODE: "${{ matrix.cni-mode == 'unprivileged' }}" MULTI_POD_SUBNET: true DYNAMIC_UDN_ALLOCATION: "${{ matrix.target == 'network-segmentation-dynamic' }}" + ENABLE_NO_OVERLAY: "${{ matrix.no-overlay == 'true' }}" steps: - name: Check out code into the Go module directory uses: actions/checkout@v4 @@ -656,7 +658,7 @@ jobs: # set 3 hours for control-plane tests as these might take a while # give 10m extra to give ginkgo chance to timeout before github so that we # get its output - timeout-minutes: ${{ matrix.target == 'bgp-loose-isolation' && 190 || matrix.target == 'bgp' && 190 || matrix.target == 'control-plane' && 190 || matrix.target == 'control-plane-helm' && 190 || matrix.target == 'external-gateway' && 190 || startsWith(matrix.target, 'network-segmentation') && 190 || 130 }} + timeout-minutes: ${{ startsWith(matrix.target, 'bgp') && 190 || matrix.target == 'control-plane' && 190 || matrix.target == 'control-plane-helm' && 190 || matrix.target == 'external-gateway' && 190 || startsWith(matrix.target, 'network-segmentation') && 190 || 130 }} run: | # used by e2e diagnostics package export OVN_IMAGE="ovn-daemonset-fedora:pr" @@ -681,7 +683,7 @@ jobs: elif [[ "${{ matrix.target }}" == network-segmentation* ]]; then make -C test control-plane WHAT="Network Segmentation" make -C test control-plane WHAT="ClusterNetworkConnect" - elif [ "${{ matrix.target }}" == "bgp" ] || [ "${{ matrix.target }}" == "bgp-loose-isolation" ]; then + elif [[ "${{ matrix.target }}" == bgp* ]]; then make -C test control-plane elif [ "${{ matrix.target }}" == "serial" ]; then # Run only Serial tests with ginkgo focus @@ -712,8 +714,7 @@ jobs: - name: Export kind logs if: always() run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + ./contrib/export-kind-logs.sh if [ -n "${TRAFFIC_FLOW_TESTS}" ]; then mv -v /tmp/{,kind/logs/}traffic_flow_test_result.json ||: fi @@ -825,9 +826,7 @@ jobs: - name: Export kind logs if: always() - run: | - mkdir -p /tmp/kind/logs - kind export logs --name ${KIND_CLUSTER_NAME} --verbosity 4 /tmp/kind/logs + run: ./contrib/export-kind-logs.sh - name: Upload kind logs if: always() diff --git a/contrib/export-kind-logs.sh b/contrib/export-kind-logs.sh new file mode 100755 index 0000000000..4db2230ee6 --- /dev/null +++ b/contrib/export-kind-logs.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Export kind cluster logs and collect coredump binaries +# Usage: ./export-kind-logs.sh [logs_dir] +# Default logs_dir: /tmp/kind/logs + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/kind-common.sh" + +# Don't create cluster or delete kubeconfig - we're just exporting logs +KIND_CREATE=false +set_common_default_params + +export_logs "$@" diff --git a/contrib/kind-common b/contrib/kind-common.sh similarity index 67% rename from contrib/kind-common rename to contrib/kind-common.sh index c030826c08..12ed53d7c2 100644 --- a/contrib/kind-common +++ b/contrib/kind-common.sh @@ -14,6 +14,9 @@ case $(uname -m) in aarch64) ARCH="arm64" ;; esac +# Directory for coredump collection (used by setup_coredumps and collect_coredump_binaries) +readonly COREDUMP_DIR="/tmp/kind/logs/coredumps" + if_error_exit() { ########################################################################### # Description: # @@ -33,12 +36,205 @@ if_error_exit() { } set_common_default_params() { + # KIND/cluster params + KIND_CREATE=${KIND_CREATE:-true} KIND_IMAGE=${KIND_IMAGE:-kindest/node} + KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} K8S_VERSION=${K8S_VERSION:-v1.34.0} KIND_SETTLE_DURATION=${KIND_SETTLE_DURATION:-30} + KIND_CONFIG=${KIND_CONFIG:-${DIR}/kind.yaml.j2} + KIND_LOCAL_REGISTRY=${KIND_LOCAL_REGISTRY:-false} + KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} + KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} + KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} + KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} + KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} + OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} + # Setup KUBECONFIG patch based on cluster-name + export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} + # Scrub any existing kubeconfigs at the path + if [ "${KIND_CREATE}" == true ]; then + rm -f "${KUBECONFIG}" + fi + + # Image/source code params + OVN_IMAGE=${OVN_IMAGE:-local} + OVN_REPO=${OVN_REPO:-""} + OVN_GITREF=${OVN_GITREF:-""} + + # Subnet params + # Input not currently validated. Modify outside script at your own risk. + # These are the same values defaulted to in KIND code (kind/default.go). + # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host + # so it needs to use a larger subnet + # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 + MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} + MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} + NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} + NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} + MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} + if [ "$MULTI_POD_SUBNET" == true ]; then + NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" + NET_CIDR_IPV6="fd00:10:243::/63/64,fd00:10:244::/48" + fi + NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} + SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} + SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} + JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} + JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} + TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} + TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} + METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} + METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} + PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} + PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} + + # Feature params + OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} + OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} + OVN_HA=${OVN_HA:-false} + ADVERTISE_DEFAULT_NETWORK=${ADVERTISE_DEFAULT_NETWORK:-false} + ADVERTISED_UDN_ISOLATION_MODE=${ADVERTISED_UDN_ISOLATION_MODE:-strict} + BGP_SERVER_NET_SUBNET_IPV4=${BGP_SERVER_NET_SUBNET_IPV4:-172.26.0.0/16} + BGP_SERVER_NET_SUBNET_IPV6=${BGP_SERVER_NET_SUBNET_IPV6:-fc00:f853:ccd:e796::/64} + OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} + OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} + OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} + OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} + ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} + METRICS_IP=${METRICS_IP:-""} + OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} + if [ "$OVN_COMPACT_MODE" == true ]; then + KIND_NUM_WORKER=0 + fi + + KIND_NUM_MASTER=1 + if [ "$OVN_HA" == true ]; then + KIND_NUM_MASTER=3 + KIND_NUM_WORKER=${KIND_NUM_WORKER:-0} + else + KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} + fi + + OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} + if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then + echo "Compact mode cannot be used together with Interconnect" + exit 1 + fi + if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then + KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} + + TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) + if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then + echo "(Total k8s nodes / number of nodes per zone) should be zero" + exit 1 + fi + else + KIND_NUM_NODES_PER_ZONE=0 + fi + + ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} + ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} + if [ "$ENABLE_NETWORK_SEGMENTATION" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then + echo "Network segmentation (UDN) requires multi-network to be enabled (-mne)" + exit 1 + fi + + ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} + if [[ $ENABLE_NETWORK_CONNECT == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Network connect requires network-segmentation to be enabled (-nse)" + exit 1 + fi + + DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} + if [[ $DYNAMIC_UDN_ALLOCATION == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Dynamic UDN allocation requires network-segmentation to be enabled (-nse)" + exit 1 + fi + DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-120s} + + ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} + if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then + echo "Preconfigured UDN addresses requires network-segmentation to be enabled (-nse)" + exit 1 + fi + if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $OVN_ENABLE_INTERCONNECT != true ]]; then + echo "Preconfigured UDN addresses requires interconnect to be enabled (-ic)" + exit 1 + fi + + ENABLE_ROUTE_ADVERTISEMENTS=${ENABLE_ROUTE_ADVERTISEMENTS:-false} + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then + echo "Route advertisements requires multi-network to be enabled (-mne)" + exit 1 + fi + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != true ]; then + echo "Route advertisements requires interconnect to be enabled (-ic)" + exit 1 + fi + + ENABLE_EVPN=${ENABLE_EVPN:-false} + if [ "$ENABLE_EVPN" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then + echo "EVPN requires Route advertisements to be enabled (-rae)" + exit 1 + fi + + ENABLE_NO_OVERLAY=${ENABLE_NO_OVERLAY:-false} + if [ "$ENABLE_NO_OVERLAY" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then + echo "No-overlay mode requires route advertisement to be enabled (-rae)" + exit 1 + fi + if [ "$ENABLE_NO_OVERLAY" == true ] && [ "$ADVERTISE_DEFAULT_NETWORK" != true ]; then + echo "No-overlay mode requires advertise the default network (-adv)" + exit 1 + fi + + if [ "$ENABLE_NO_OVERLAY" == true ]; then + # Set default MTU for no-overlay mode (1500) if not already set + OVN_MTU=${OVN_MTU:-1500} + else + # Set default MTU for overlay mode (1400) if not already set + OVN_MTU=${OVN_MTU:-1400} + fi +} + +set_ovn_image() { + if [ "${KIND_LOCAL_REGISTRY:-false}" == true ]; then + OVN_IMAGE="localhost:5000/ovn-daemonset-fedora:latest" + else + OVN_IMAGE="localhost/ovn-daemonset-fedora:dev" + fi +} + +build_ovn_image() { + local push_args="" + if [ "$OCI_BIN" == "podman" ]; then + # docker doesn't perform tls check by default only podman does, hence we need to disable it for podman. + push_args="--tls-verify=false" + fi + + if [ "$OVN_IMAGE" == local ]; then + set_ovn_image + + # Build image + make -C ${DIR}/../dist/images IMAGE="${OVN_IMAGE}" OVN_REPO="${OVN_REPO}" OVN_GITREF="${OVN_GITREF}" OCI_BIN="${OCI_BIN}" fedora-image + + # store in local registry + if [ "$KIND_LOCAL_REGISTRY" == true ];then + echo "Pushing built image to local $OCI_BIN registry" + $OCI_BIN push $push_args "$OVN_IMAGE" + fi + # We should push to local registry if image is not remote + elif [[ -n "${OVN_IMAGE}" && "${KIND_LOCAL_REGISTRY}" == true && "${OVN_IMAGE}" != */* ]]; then + local local_registry_ovn_image="localhost:5000/${OVN_IMAGE}" + $OCI_BIN tag "$OVN_IMAGE" $local_registry_ovn_image + OVN_IMAGE=$local_registry_ovn_image + $OCI_BIN push $push_args "$OVN_IMAGE" + fi } run_kubectl() { + kind export kubeconfig --name ${KIND_CLUSTER_NAME} local retries=0 local attempts=10 while true; do @@ -542,6 +738,71 @@ build_dnsnameresolver_images() { build_image /tmp/coredns-ocp-dnsnameresolver/operator ${DNSNAMERESOLVER_OPERATOR} Dockerfile } +check_common_dependencies() { + if ! command_exists curl ; then + echo "Dependency not met: Command not found 'curl'" + exit 1 + fi + + if ! command_exists kubectl ; then + echo "'kubectl' not found, installing" + setup_kubectl_bin + fi + + if ! command_exists kind ; then + echo "Dependency not met: Command not found 'kind'" + exit 1 + fi + + local kind_min="0.27.0" + local kind_cur + kind_cur=$(kind version -q) + if [ "$(echo -e "$kind_min\n$kind_cur" | sort -V | head -1)" != "$kind_min" ]; then + echo "Dependency not met: expected kind version >= $kind_min but have $kind_cur" + exit 1 + fi + + if ! command_exists jq ; then + echo "Dependency not met: Command not found 'jq'" + exit 1 + fi + + if ! command_exists awk ; then + echo "Dependency not met: Command not found 'awk'" + exit 1 + fi + + if ! command_exists jinjanate ; then + if ! command_exists pipx ; then + echo "Dependency not met: 'jinjanator' not installed and cannot install with 'pipx'" + exit 1 + fi + echo "'jinjanate' not found, installing with 'pipx'" + install_jinjanator_renderer + fi + + if ! command_exists docker && ! command_exists podman; then + echo "Dependency not met: Neither docker nor podman found" + exit 1 + fi + + if command_exists podman && ! command_exists skopeo; then + echo "Dependency not met: skopeo not installed. Run the following command to install it: 'sudo dnf install skopeo'" + exit 1 + fi +} + +install_jinjanator_renderer() { + # ensure jinjanator renderer installed + pipx install jinjanator[yaml] + pipx ensurepath --force >/dev/null + export PATH=~/.local/bin:$PATH +} + +install_ovn_image() { + install_image "${OVN_IMAGE}" +} + # install_image accepts the image name along with the tag as an argument and installs it. install_image() { # If local registry is being used push image there for consumption by kind cluster @@ -822,7 +1083,7 @@ destroy_bgp() { fi } -install_ffr_k8s() { +install_frr_k8s() { echo "Installing frr-k8s ..." clone_frr @@ -841,8 +1102,8 @@ install_ffr_k8s() { if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then # Find all line numbers where the IPv4 prefix is defined IPv6_LINE=" - prefix: ${BGP_SERVER_NET_SUBNET_IPV6}" - # Process each occurrence of the IPv4 prefix - for LINE_NUM in $(grep -n "prefix: ${BGP_SERVER_NET_SUBNET_IPV4}" receive_filtered.yaml | cut -d ':' -f 1); do + # Process each occurrence of the IPv4 prefix in reverse order to avoid line number shifting + for LINE_NUM in $(grep -n "prefix: ${BGP_SERVER_NET_SUBNET_IPV4}" receive_filtered.yaml | cut -d ':' -f 1 | sort -rn); do # Insert the IPv6 prefix after each IPv4 prefix line sed -i "${LINE_NUM}a\\${IPv6_LINE}" receive_filtered.yaml done @@ -923,18 +1184,18 @@ interconnect_arg_check() { setup_coredumps() { # Setup core dump collection # - # Core dumps will be saved on the HOST at /tmp/kind/logs/coredumps (not inside containers) + # Core dumps will be saved on the HOST at $COREDUMP_DIR (not inside containers) # because kernel.core_pattern is a kernel-level setting shared across all containers. # # - Using a pipe instead of a file path avoids needing to mount - # /tmp/kind/logs/coredumps into every container that might crash - # - The pipe executes in the host's namespace, so /tmp/kind/logs/coredumps + # $COREDUMP_DIR into every container that might crash + # - The pipe executes in the host's namespace, so $COREDUMP_DIR # automatically refers to the host path # - # Location: /tmp/kind/logs is used to ensure coredumps are exported in CI + # Location: COREDUMP_DIR is under /tmp/kind/logs to ensure coredumps are exported in CI # Use container exec to avoid asking for root permissions - mkdir -p "/tmp/kind/logs/coredumps" + mkdir -p "$COREDUMP_DIR" ulimit -c unlimited for node in $(kind get nodes --name "${KIND_CLUSTER_NAME}"); do # Core dump filename pattern variables: @@ -942,6 +1203,257 @@ setup_coredumps() { # %e - executable filename # %h - hostname (container hostname) # %s - signal number that caused dump - ${OCI_BIN} exec "$node" sysctl -w kernel.core_pattern="|/bin/dd of=/tmp/kind/logs/coredumps/core.%P.%e.%h.%s bs=1M status=none" + ${OCI_BIN} exec "$node" sysctl -w kernel.core_pattern="|/bin/dd of=${COREDUMP_DIR}/core.%P.%e.%h.%s bs=1M status=none" + done +} + +wait_for_coredumps() { + # Wait for any in-progress coredump writes to complete + # The kernel pipes coredumps to dd processes, which can take 30+ seconds for large Go binaries + # + # Challenge: Go's crash handling (printing stack traces for all goroutines) takes + # several seconds BEFORE it calls abort() and the kernel starts the coredump. + # So we can't just check for dd processes - we need to wait for potential crashes + # to fully materialize. + + local max_wait=120 # Maximum wait time in seconds + local initial_wait=15 # Initial wait for Go crash handling to complete + local waited=0 + + if [ ! -d "$COREDUMP_DIR" ]; then + return 0 + fi + + # Record initial coredump count + local initial_count + initial_count=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null | wc -l || echo 0) + echo "Checking for in-progress coredump writes (initial count: $initial_count)..." + + # Initial wait: Go's crash handling (printing goroutine stack traces) can take + # 10+ seconds before abort() is called and the kernel starts the coredump + echo "Waiting ${initial_wait}s for any pending crash handling to complete..." + sleep "$initial_wait" + waited=$initial_wait + + while [ $waited -lt $max_wait ]; do + # Check for dd processes writing to the coredump directory + local dd_procs + dd_procs=$(pgrep -f "dd of=${COREDUMP_DIR}" 2>/dev/null || true) + + # Check current coredump count + local current_count + current_count=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null | wc -l || echo 0) + + if [ -z "$dd_procs" ]; then + # No dd processes running + if [ "$current_count" -gt "$initial_count" ]; then + echo "New coredumps detected (initial: $initial_count, current: $current_count) after ${waited}s" + fi + echo "No coredump writes in progress after ${waited}s" + return 0 + fi + + echo "Waiting for coredump writes... (${waited}s, dd PIDs: $dd_procs, coredumps: $current_count)" + sleep 5 + waited=$((waited + 5)) + done + + echo "Warning: Timed out waiting for coredump writes after ${max_wait}s" +} + +export_logs() { + # Export kind logs and collect coredump binaries + # Usage: export_logs [logs_dir] + # Default logs_dir: /tmp/kind/logs + + local logs_dir="${1:-/tmp/kind/logs}" + + mkdir -p "$logs_dir" + + # Wait for any in-progress coredump writes to complete before exporting + wait_for_coredumps + + kind export logs --name "${KIND_CLUSTER_NAME}" --verbosity 4 "$logs_dir" + collect_coredump_binaries +} + +# Helper function to try extracting a binary from a container +# Used by collect_coredump_binaries() +try_extract_binary() { + local node=$1 + local container_id=$2 + local exe=$3 + local binary_dir=$4 + + # Get container's PID to access its rootfs via /proc//root + local pid + pid=$(${OCI_BIN} exec "$node" crictl inspect "$container_id" 2>/dev/null | jq -r '.info.pid // empty') + if [ -z "$pid" ] || [ "$pid" = "null" ] || [ "$pid" = "0" ]; then + return 1 + fi + + # Common paths where binaries might be located + local binary_paths=("/usr/bin" "/bin" "/usr/sbin" "/sbin" "/usr/libexec/cni" "/usr/lib/frr") + + for path in "${binary_paths[@]}"; do + local full_path="/proc/${pid}/root${path}/${exe}" + if ${OCI_BIN} exec "$node" test -f "$full_path" 2>/dev/null; then + if ${OCI_BIN} exec "$node" cat "$full_path" > "${binary_dir}/${exe}" 2>/dev/null && [ -s "${binary_dir}/${exe}" ]; then + echo " Collected binary: ${exe} from container $container_id (pid $pid)" + return 0 + fi + fi + done + rm -f "${binary_dir}/${exe}" 2>/dev/null + return 1 +} + +collect_coredump_binaries() { + # Collect binaries that caused coredumps for post-mortem debugging + # Parses coredump filenames (core.%P.%e.%h.%s) to identify executables + # Binaries run inside pod containers, so we use crictl to access them + + local binary_dir="${COREDUMP_DIR}/binaries" + + if [ ! -d "$COREDUMP_DIR" ]; then + echo "No coredump directory found, skipping binary collection" + return 0 + fi + + local coredumps + coredumps=$(find "$COREDUMP_DIR" -maxdepth 1 -name "core.*" -type f 2>/dev/null) + if [ -z "$coredumps" ]; then + echo "No coredumps found, skipping binary collection" + return 0 + fi + + mkdir -p "$binary_dir" + + # Get all KIND nodes + local nodes + nodes=$(kind get nodes --name "${KIND_CLUSTER_NAME}" 2>/dev/null) + if [ -z "$nodes" ]; then + echo "Warning: No KIND nodes available, cannot collect binaries" + return 0 + fi + + # Process each coredump: extract exe name (%e, field 3) + # Filename format: core.%P.%e.%h.%s (see setup_coredumps) + for coredump in $coredumps; do + local filename + filename=$(basename "$coredump") + local exe + exe=$(echo "$filename" | cut -d. -f3) + + echo "Processing coredump: $filename (exe=$exe)" + + # Skip if we already collected this binary + if [ -f "${binary_dir}/${exe}" ]; then + echo " Binary $exe already collected, skipping" + continue + fi + + local found=false + + # Search all containers on all nodes for the binary + for node in $nodes; do + local containers + containers=$(${OCI_BIN} exec "$node" crictl ps -q 2>/dev/null) || true + for container_id in $containers; do + if try_extract_binary "$node" "$container_id" "$exe" "$binary_dir"; then + echo " Collected $exe from container $container_id on node $node" + found=true + break 2 + fi + done + done + + # Fallback: binary running directly on KIND node (not in container) + if [ "$found" = false ]; then + for node in $nodes; do + local bin_path + bin_path=$(${OCI_BIN} exec "$node" which "$exe" 2>/dev/null) || true + if [ -n "$bin_path" ]; then + echo " Collected $exe from node $node at $bin_path" + ${OCI_BIN} cp "${node}:${bin_path}" "${binary_dir}/${exe}" && found=true || true + break + fi + done + fi + + if [ "$found" = false ]; then + echo " WARNING: Could not find binary '$exe'" + fi + done + + echo "Binary collection complete:" + ls -la "$binary_dir" 2>/dev/null || true +} + +# Some environments (Fedora32,31 on desktop), have problems when the cluster +# is deleted directly with kind `kind delete cluster --name ovn`, it restarts the host. +# The root cause is unknown, this also can not be reproduced in Ubuntu 20.04 or +# with Fedora32 Cloud, but it does not happen if we clean first the ovn-kubernetes resources. +delete() { + OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} + + if [ "$KIND_INSTALL_METALLB" == true ]; then + destroy_metallb + fi + if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + destroy_bgp + fi + timeout 5 kubectl --kubeconfig "${KUBECONFIG}" delete namespace ovn-kubernetes || true + sleep 5 + kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" +} + +create_kind_cluster() { + # Output of the jinjanate command + KIND_CONFIG_LCL=${DIR}/kind-${KIND_CLUSTER_NAME}.yaml + + ovn_ip_family=${IP_FAMILY} \ + ovn_ha=${OVN_HA} \ + net_cidr="${KIND_CIDR}" \ + svc_cidr=${SVC_CIDR} \ + use_local_registry=${KIND_LOCAL_REGISTRY} \ + dns_domain=${KIND_DNS_DOMAIN} \ + ovn_num_master=${KIND_NUM_MASTER} \ + ovn_num_worker=${KIND_NUM_WORKER} \ + kind_num_infra=${KIND_NUM_INFRA} \ + cluster_log_level=${KIND_CLUSTER_LOGLEVEL:-4} \ + kind_local_registry_port=${KIND_LOCAL_REGISTRY_PORT} \ + kind_local_registry_name=${KIND_LOCAL_REGISTRY_NAME} \ + jinjanate "${KIND_CONFIG}" -o "${KIND_CONFIG_LCL}" + + # Create KIND cluster. For additional debug, add '--verbosity ': 0 None .. 3 Debug + if kind get clusters | grep "${KIND_CLUSTER_NAME}"; then + delete + fi + + if [[ "${KIND_LOCAL_REGISTRY}" == true ]]; then + create_local_registry + fi + + kind create cluster --name "${KIND_CLUSTER_NAME}" --kubeconfig "${KUBECONFIG}" --image "${KIND_IMAGE}":"${K8S_VERSION}" --config=${KIND_CONFIG_LCL} --retain + + cat "${KUBECONFIG}" +} + +remove_no_schedule_taint() { + KIND_NODES=$(kind_get_nodes | sort) + for n in $KIND_NODES; do + # do not error if it fails to remove the taint + kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- || true + done +} + +label_ovn_ha() { + MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") + # We want OVN HA not Kubernetes HA + # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= + # to choose the nodes where ovn master components will be placed + for n in $MASTER_NODES; do + kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite done } diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index d8d2ba75cc..85764d6d91 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -5,94 +5,21 @@ set -eo pipefail # Returns the full directory name of the script export DIR="$( cd -- "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -export OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} - -# Source the kind-common file from the same directory where this script is located -source "${DIR}/kind-common" +# Source the kind-common.sh file from the same directory where this script is located +source "${DIR}/kind-common.sh" set_default_params() { set_common_default_params - # Set default values - export KIND_CONFIG=${KIND_CONFIG:-} - export KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} - export KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} - export KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} - export KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} - export OVN_HA=${OVN_HA:-false} - export OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} - export OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} - export OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} - export OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} - export KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} - export ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} - export ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} - export ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} - export ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} - export OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} - export KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} - export KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} - export OVN_IMAGE=${OVN_IMAGE:-'ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu:helm'} - - # Setup KUBECONFIG patch based on cluster-name - export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} - - # Validated params that work - export MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} - export MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} - - # Input not currently validated. Modify outside script at your own risk. - # These are the same values defaulted to in KIND code (kind/default.go). - # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host - # so it needs to use a larger subnet - # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 - export NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} - if [ "$MULTI_POD_SUBNET" == true ]; then - NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" - fi - export NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} - export SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} - export NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} - export SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} - export JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} - export JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} - export TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} - export TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} - export METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} - export METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} - export DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} - export DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-} - - export KIND_NUM_MASTER=1 - if [ "$OVN_HA" == true ]; then - KIND_NUM_MASTER=3 - fi - - OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} - if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then - echo "Compact mode cannot be used together with Interconnect" - exit 1 + # Hard code ipv4 support until IPv6 is implemented + if [ "$PLATFORM_IPV6_SUPPORT" == true ]; then + echo "kind-helm.sh does not support IPv6 yet" + exit 1 fi - - - if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then - KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} - TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) - if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then - echo "(Total k8s nodes / number of nodes per zone) should be zero" - exit 1 - fi - else - KIND_NUM_NODES_PER_ZONE=0 + if [ "$PLATFORM_IPV4_SUPPORT" != true ]; then + echo "kind-helm.sh only supports IPv4, must set PLATFORM_IPV4_SUPPORT to true " + exit 1 fi - - # Hard code ipv4 support until IPv6 is implemented - export PLATFORM_IPV4_SUPPORT=true - - export OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} - export MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} - export ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} - export METRICS_IP=${METRICS_IP:-""} } usage() { @@ -111,14 +38,23 @@ usage() { echo " [ -nse | --network-segmentation-enable ]" echo " [ -nce | --network-connect-enable ]" echo " [ -uae | --preconfigured-udn-addresses-enable ]" + echo " [ -rae | --route-advertisements-enable ]" + echo " [ -evpn | --evpn-enable ]" echo " [-dudn | --dynamic-udn-allocation]" echo " [-dug | --dynamic-udn-removal-grace-period]" + echo " [-adv | --advertise-default-network]" + echo " [-rud | --routed-udn-isolation-disable]" echo " [ -nqe | --network-qos-enable ]" + echo " [ -noe | --no-overlay-enable ]" echo " [ -wk | --num-workers ]" echo " [ -ic | --enable-interconnect]" echo " [ -npz | --node-per-zone ]" + echo " [ -ov | --ovn-image ]" + echo " [ -ovr | --ovn-repo ]" + echo " [ -ovg | --ovn-gitref ]" echo " [ -cn | --cluster-name ]" echo " [ -mip | --metrics-ip ]" + echo " [ -mtu ]" echo " [ --enable-coredumps ]" echo " [ -h ]" echo "" @@ -139,13 +75,22 @@ usage() { echo "-nse | --network-segmentation-enable Enable network segmentation. DEFAULT: Disabled" echo "-nce | --network-connect-enable Enable network connect (requires network segmentation). DEFAULT: Disabled" echo "-uae | --preconfigured-udn-addresses-enable Enable connecting workloads with preconfigured network to user-defined networks. DEFAULT: Disabled" + echo "-rae | --route-advertisements-enable Enable route advertisements" + echo "-evpn | --evpn-enable Enable EVPN" echo "-dudn | --dynamic-udn-allocation Enable dynamic UDN allocation. DEFAULT: Disabled" echo "-dug | --dynamic-udn-removal-grace-period Configure the grace period in seconds for dynamic UDN removal. DEFAULT: 120 seconds" + echo "-adv | --advertise-default-network Applies a RouteAdvertisements configuration to advertise the default network on all nodes" + echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-nqe | --network-qos-enable Enable network QoS. DEFAULT: Disabled" + echo "-noe | --no-overlay-enable Enable no-overlay mode for the default network. DEFAULT: Disabled" echo "-ha | --ha-enabled Enable high availability. DEFAULT: HA Disabled" echo "-wk | --num-workers Number of worker nodes. DEFAULT: 2 workers" + echo "-ov | --ovn-image Use the specified docker image instead of building locally. DEFAULT: local build." + echo "-ovr | --ovn-repo Specify the repository to build OVN from" + echo "-ovg | --ovn-gitref Specify the branch, tag or commit id to build OVN from, it can be a pattern like 'branch-*' it will order results and use the first one" echo "-cn | --cluster-name Configure the kind cluster's name" echo "-mip | --metrics-ip IP address to bind metrics endpoints. DEFAULT: K8S_NODE_IP or 0.0.0.0" + echo "-mtu Define the overlay mtu. DEFAULT: 1400 (1500 for no-overlay mode)" echo "--enable-coredumps Enable coredump collection on kind nodes. DEFAULT: Disabled" echo "-dns | --enable-dnsnameresolver Enable DNSNameResolver for resolving the DNS names used in the DNS rules of EgressFirewall." echo "-ce | --enable-central [DEPRECATED] Deploy with OVN Central (Legacy Architecture)" @@ -195,6 +140,14 @@ parse_args() { ;; -uae | --preconfigured-udn-addresses-enable) ENABLE_PRE_CONF_UDN_ADDR=true ;; + -rae | --route-advertisements-enable) ENABLE_ROUTE_ADVERTISEMENTS=true + ;; + -evpn | --evpn-enable) ENABLE_EVPN=true + ;; + -adv | --advertise-default-network) ADVERTISE_DEFAULT_NETWORK=true + ;; + -rud | --routed-udn-isolation-disable) ADVERTISED_UDN_ISOLATION_MODE=loose + ;; -dudn | --dynamic-udn-allocation) DYNAMIC_UDN_ALLOCATION=true ;; -dug | --dynamic-udn-removal-grace-period) shift @@ -210,6 +163,8 @@ parse_args() { ;; -nqe | --network-qos-enable ) OVN_NETWORK_QOS_ENABLE=true ;; + -noe | --no-overlay-enable ) ENABLE_NO_OVERLAY=true + ;; -ha | --ha-enabled ) OVN_HA=true KIND_NUM_MASTER=3 ;; @@ -221,6 +176,15 @@ parse_args() { fi KIND_NUM_WORKER=$1 ;; + -ov | --ovn-image ) shift + OVN_IMAGE=$1 + ;; + -ovr | --ovn-repo ) shift + OVN_REPO=$1 + ;; + -ovg | --ovn-gitref ) shift + OVN_GITREF=$1 + ;; -cn | --cluster-name ) shift KIND_CLUSTER_NAME=$1 # Setup KUBECONFIG @@ -248,6 +212,9 @@ parse_args() { -mip | --metrics-ip ) shift METRICS_IP="$1" ;; + -mtu ) shift + OVN_MTU=$1 + ;; --enable-coredumps ) ENABLE_COREDUMPS=true ;; * ) usage @@ -267,6 +234,7 @@ print_params() { echo "" echo "KIND_CONFIG_FILE = $KIND_CONFIG" echo "KUBECONFIG = $KUBECONFIG" + echo "OCI_BIN = $OCI_BIN" echo "KIND_INSTALL_INGRESS = $KIND_INSTALL_INGRESS" echo "KIND_INSTALL_METALLB = $KIND_INSTALL_METALLB" echo "KIND_INSTALL_PLUGINS = $KIND_INSTALL_PLUGINS" @@ -282,8 +250,16 @@ print_params() { echo "ENABLE_NETWORK_SEGMENTATION = $ENABLE_NETWORK_SEGMENTATION" echo "ENABLE_NETWORK_CONNECT = $ENABLE_NETWORK_CONNECT" echo "ENABLE_PRE_CONF_UDN_ADDR = $ENABLE_PRE_CONF_UDN_ADDR" + echo "ENABLE_ROUTE_ADVERTISEMENTS = $ENABLE_ROUTE_ADVERTISEMENTS" + echo "ENABLE_EVPN = $ENABLE_EVPN" + echo "ADVERTISE_DEFAULT_NETWORK = $ADVERTISE_DEFAULT_NETWORK" + echo "ADVERTISED_UDN_ISOLATION_MODE = $ADVERTISED_UDN_ISOLATION_MODE" echo "OVN_NETWORK_QOS_ENABLE = $OVN_NETWORK_QOS_ENABLE" + echo "ENABLE_NO_OVERLAY = $ENABLE_NO_OVERLAY" + echo "OVN_MTU = $OVN_MTU" echo "OVN_IMAGE = $OVN_IMAGE" + echo "OVN_REPO = $OVN_REPO" + echo "OVN_GITREF = $OVN_GITREF" echo "KIND_NUM_MASTER = $KIND_NUM_MASTER" echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" @@ -302,23 +278,11 @@ print_params() { } check_dependencies() { - if ! command_exists kubectl ; then - echo "'kubectl' not found, installing" - setup_kubectl_bin - fi - - for cmd in "$OCI_BIN" kind helm go ; do \ - if ! command_exists "$cmd" ; then - echo "Dependency not met: $cmd" - exit 1 - fi - done - - # check for currently unsupported features - if [ "${PLATFORM_IPV6_SUPPORT:-}" = "true" ]; then - echo "Fatal: PLATFORM_IPV6_SUPPORT support not implemented yet" - exit 1 - fi + check_common_dependencies + if ! command_exists helm ; then + echo "'helm' not found, exiting" + exit 1 + fi } helm_prereqs() { @@ -328,103 +292,6 @@ helm_prereqs() { sudo sysctl fs.inotify.max_user_instances=512 } -build_ovn_image() { - if [ "${SKIP_OVN_IMAGE_REBUILD}" == "true" ]; then - echo "Explicitly instructed not to rebuild ovn image: ${OVN_IMAGE}" - return - fi - - # Build ovn kube image - pushd ${DIR}/../dist/images - make fedora-image - popd -} - -get_image() { - local image_and_tag="${1:-$OVN_IMAGE}" # Use $1 if provided, otherwise use $OVN_IMAGE - local image="${image_and_tag%%:*}" # Extract everything before the first colon - echo "$image" -} - -get_tag() { - local image_and_tag="${1:-$OVN_IMAGE}" # Use $1 if provided, otherwise use $OVN_IMAGE - local tag="${image_and_tag##*:}" # Extract everything after the last colon - echo "$tag" -} - -create_kind_cluster() { - [ -n "${KIND_CONFIG}" ] || { - KIND_CONFIG='/tmp/kind.yaml' - - # Start of the kind configuration - cat < /tmp/kind.yaml -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -nodes: -- role: control-plane - kubeadmConfigPatches: - - | - kind: InitConfiguration - nodeRegistration: - kubeletExtraArgs: - node-labels: "ingress-ready=true" - authorization-mode: "AlwaysAllow" -EOT - } - - # Add control-plane nodes based on OVN_HA status. If there are 2 or more worker nodes, use - # 2 of them them to host databases instead of creating additional control plane nodes. - if [ "$OVN_HA" == true ] && [ "$KIND_NUM_WORKER" -lt 2 ]; then - for i in {2..3}; do # Have 3 control-plane nodes for HA - echo "- role: control-plane" >> /tmp/kind.yaml - done - fi - - # Add worker nodes based on KIND_NUM_WORKER - for i in $(seq 1 $KIND_NUM_WORKER); do - echo "- role: worker" >> /tmp/kind.yaml - done - # kind only allows single subnet for pod network, while ovn-kubernetes supports multiple subnets. - # So we pick the first subnet from the provided list for kind configuration and store it in KIND_CIDR. - # remove host subnet mask info for kind configuration (when the subnet is set as 10.0.0.0/16/14) - KIND_CIDR_IPV4=$(echo "${NET_CIDR_IPV4}"| cut -d',' -f1 | cut -d'/' -f1,2 ) - - # Add networking configuration - cat <> /tmp/kind.yaml -networking: - disableDefaultCNI: true - kubeProxyMode: none - podSubnet: $KIND_CIDR_IPV4 - serviceSubnet: $SVC_CIDR_IPV4 -EOT - - kind delete clusters $KIND_CLUSTER_NAME ||: - kind create cluster --name $KIND_CLUSTER_NAME --image "${KIND_IMAGE}":"${K8S_VERSION}" --config "${KIND_CONFIG}" --retain - kind load docker-image --name $KIND_CLUSTER_NAME $OVN_IMAGE - - # When using HA, label nodes to host db. - if [ "$OVN_HA" == true ]; then - kubectl label nodes k8s.ovn.org/ovnkube-db=true --overwrite \ - -l node-role.kubernetes.io/control-plane - if [ "$KIND_NUM_WORKER" -ge 2 ]; then - for n in ovn-worker ovn-worker2; do - # We want OVN HA not Kubernetes HA - # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= - # to choose the nodes where ovn master components will be placed - kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite - done - fi - fi - - # Remove taint, so control-plane nodes can also schedule regular pods - if [ "$KIND_REMOVE_TAINT" == true ]; then - kubectl taint node "$n" node-role.kubernetes.io/master:NoSchedule- \ - -l node-role.kubernetes.io/control-plane ||: - kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- \ - -l node-role.kubernetes.io/control-plane ||: - fi -} - label_ovn_single_node_zones() { KIND_NODES=$(kind_get_nodes) for n in $KIND_NODES; do @@ -455,7 +322,6 @@ label_ovn_multiple_nodes_zones() { create_ovn_kubernetes() { cd ${DIR}/../helm/ovn-kubernetes - MASTER_REPLICAS=$(kubectl get node -l node-role.kubernetes.io/control-plane --no-headers | wc -l) if [[ $KIND_NUM_NODES_PER_ZONE == 1 ]]; then label_ovn_single_node_zones value_file="values-single-node-zone.yaml" @@ -465,10 +331,12 @@ create_ovn_kubernetes() { value_file="values-multi-node-zone.yaml" ovnkube_db_options="" else + label_ovn_ha value_file="values-no-ic.yaml" ovnkube_db_options="--set tags.ovnkube-db-raft=$(if [ "${OVN_HA}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set tags.ovnkube-db=$(if [ "${OVN_HA}" == "false" ]; then echo "true"; else echo "false"; fi)" fi + MASTER_REPLICAS=$(kubectl get node -l node-role.kubernetes.io/control-plane --no-headers | wc -l) echo "value_file=${value_file}" # For multi-pod-subnet case, NET_CIDR_IPV4 is a list of CIDRs separated by comma. # When Helm encounters a comma within a string value in a --set argument, it attempts to parse the comma as a separator @@ -480,9 +348,10 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set k8sAPIServer=${API_URL} \ --set podNetwork="${ESCAPED_NET_CIDR_IPV4}" \ --set serviceNetwork=${SVC_CIDR_IPV4} \ + --set mtu=${OVN_MTU} \ --set ovnkube-master.replicas=${MASTER_REPLICAS} \ - --set global.image.repository=$(get_image) \ - --set global.image.tag=$(get_tag) \ + --set global.image.repository=${OVN_IMAGE%%:*} \ + --set global.image.tag=${OVN_IMAGE##*:} \ --set global.enableAdminNetworkPolicy=true \ --set global.enableMulticast=$(if [ "${OVN_MULTICAST_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableMultiNetwork=$(if [ "${ENABLE_MULTI_NET}" == "true" ]; then echo "true"; else echo "false"; fi) \ @@ -491,11 +360,16 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableDynamicUDNAllocation=$(if [ "${DYNAMIC_UDN_ALLOCATION}" == "true" ]; then echo "true"; else echo "false"; fi) \ $( [ -n "$DYNAMIC_UDN_GRACE_PERIOD" ] && echo "--set global.dynamicUDNGracePeriod=$DYNAMIC_UDN_GRACE_PERIOD" ) \ --set global.enablePreconfiguredUDNAddresses=$(if [ "${ENABLE_PRE_CONF_UDN_ADDR}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableRouteAdvertisements=$(if [ "${ENABLE_ROUTE_ADVERTISEMENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableEVPN=$(if [ "${ENABLE_EVPN}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.advertiseDefaultNetwork=$(if [ "${ADVERTISE_DEFAULT_NETWORK}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.advertisedUDNIsolationMode="${ADVERTISED_UDN_ISOLATION_MODE}" \ --set global.enableHybridOverlay=$(if [ "${OVN_HYBRID_OVERLAY_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableObservability=$(if [ "${OVN_OBSERV_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.emptyLbEvents=$(if [ "${OVN_EMPTY_LB_EVENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableDNSNameResolver=$(if [ "${OVN_ENABLE_DNSNAMERESOLVER}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.enableNoOverlay=$(if [ "${ENABLE_NO_OVERLAY}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableCoredumps=$(if [ "${ENABLE_COREDUMPS}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF @@ -504,14 +378,6 @@ EOF eval "${cmd}" } -delete() { - if [ "$KIND_INSTALL_METALLB" == true ]; then - destroy_metallb - fi - helm uninstall ovn-kubernetes && sleep 5 ||: - kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" -} - install_online_ovn_kubernetes_crds() { # NOTE: When you update vendoring versions for the ANP & BANP APIs, we must update the version of the CRD we pull from in the below URL run_kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/network-policy-api/v0.1.5/config/crd/experimental/policy.networking.k8s.io_adminnetworkpolicies.yaml @@ -529,6 +395,7 @@ if [ "$ENABLE_COREDUMPS" == true ]; then setup_coredumps fi detect_apiserver_url +install_ovn_image docker_disable_ipv6 coredns_patch if [ "$OVN_ENABLE_DNSNAMERESOLVER" == true ]; then @@ -539,6 +406,13 @@ if [ "$OVN_ENABLE_DNSNAMERESOLVER" == true ]; then add_ocp_dnsnameresolver_to_coredns_config update_coredns_deployment_image fi +if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + deploy_frr_external_container + deploy_bgp_external_server +fi +if [ "$KIND_REMOVE_TAINT" == true ]; then + remove_no_schedule_taint +fi create_ovn_kubernetes install_online_ovn_kubernetes_crds @@ -574,4 +448,8 @@ if [ "$KIND_INSTALL_KUBEVIRT" == true ]; then install_kubevirt fi +if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then + install_frr_k8s +fi + interconnect_arg_check diff --git a/contrib/kind.sh b/contrib/kind.sh index 2d489b9f01..ea8ce6fc7c 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -3,26 +3,8 @@ # Returns the full directory name of the script DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -# Source the kind-common file from the same directory where this script is located -source "${DIR}/kind-common" - -# Some environments (Fedora32,31 on desktop), have problems when the cluster -# is deleted directly with kind `kind delete cluster --name ovn`, it restarts the host. -# The root cause is unknown, this also can not be reproduced in Ubuntu 20.04 or -# with Fedora32 Cloud, but it does not happen if we clean first the ovn-kubernetes resources. -delete() { - OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} - - if [ "$KIND_INSTALL_METALLB" == true ]; then - destroy_metallb - fi - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then - destroy_bgp - fi - timeout 5 kubectl --kubeconfig "${KUBECONFIG}" delete namespace ovn-kubernetes || true - sleep 5 - kind delete cluster --name "${KIND_CLUSTER_NAME:-ovn}" -} +# Source the kind-common.sh file from the same directory where this script is located +source "${DIR}/kind-common.sh" usage() { echo "usage: kind.sh [[[-cf |--config-file ] [-kt|--keep-taint] [-ha|--ha-enabled]" @@ -60,6 +42,7 @@ usage() { echo " [-dug | --dynamic-udn-removal-grace-period ]" echo " [-adv | --advertise-default-network]" echo " [-nqe | --network-qos-enable]" + echo " [-noe | --no-overlay-enable]" echo " [--isolated]" echo " [--enable-coredumps]" echo " [-dns | --enable-dnsnameresolver]" @@ -142,6 +125,7 @@ echo "-dug | --dynamic-udn-removal-grace-period Configure the grac echo "-adv | --advertise-default-network Applies a RouteAdvertisements configuration to advertise the default network on all nodes" echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" +echo "-noe | --no-overlay-enable Enable no overlay" echo "" } @@ -372,6 +356,8 @@ parse_args() { -ic | --enable-interconnect ) OVN_ENABLE_INTERCONNECT=true IC_ARG_PROVIDED=true ;; + -noe | --no-overlay-enable) ENABLE_NO_OVERLAY=true + ;; --disable-ovnkube-identity) OVN_ENABLE_OVNKUBE_IDENTITY=false ;; -mtu ) shift @@ -417,6 +403,7 @@ print_params() { echo "KIND_INSTALL_PLUGINS = $KIND_INSTALL_PLUGINS" echo "KIND_INSTALL_KUBEVIRT = $KIND_INSTALL_KUBEVIRT" echo "KIND_OPT_OUT_KUBEVIRT_IPAM = $KIND_OPT_OUT_KUBEVIRT_IPAM" + echo "OCI_BIN = $OCI_BIN" echo "OVN_HA = $OVN_HA" echo "RUN_IN_CONTAINER = $RUN_IN_CONTAINER" echo "KIND_CLUSTER_NAME = $KIND_CLUSTER_NAME" @@ -477,6 +464,7 @@ print_params() { echo "ENABLE_PRE_CONF_UDN_ADDR = $ENABLE_PRE_CONF_UDN_ADDR" echo "DYNAMIC_UDN_ALLOCATION = $DYNAMIC_UDN_ALLOCATION" echo "DYNAMIC_UDN_GRACE_PERIOD = $DYNAMIC_UDN_GRACE_PERIOD" + echo "ENABLE_NO_OVERLAY = $ENABLE_NO_OVERLAY" echo "OVN_ENABLE_INTERCONNECT = $OVN_ENABLE_INTERCONNECT" if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then echo "KIND_NUM_NODES_PER_ZONE = $KIND_NUM_NODES_PER_ZONE" @@ -496,67 +484,6 @@ print_params() { echo "" } -install_jinjanator_renderer() { - # ensure jinjanator renderer installed - pipx install jinjanator[yaml] - pipx ensurepath --force >/dev/null - export PATH=~/.local/bin:$PATH -} - -check_dependencies() { - if ! command_exists curl ; then - echo "Dependency not met: Command not found 'curl'" - exit 1 - fi - - if ! command_exists kubectl ; then - echo "'kubectl' not found, installing" - setup_kubectl_bin - fi - - if ! command_exists kind ; then - echo "Dependency not met: Command not found 'kind'" - exit 1 - fi - - local kind_min="0.27.0" - local kind_cur - kind_cur=$(kind version -q) - if [ "$(echo -e "$kind_min\n$kind_cur" | sort -V | head -1)" != "$kind_min" ]; then - echo "Dependency not met: expected kind version >= $kind_min but have $kind_cur" - exit 1 - fi - - if ! command_exists jq ; then - echo "Dependency not met: Command not found 'jq'" - exit 1 - fi - - if ! command_exists awk ; then - echo "Dependency not met: Command not found 'awk'" - exit 1 - fi - - if ! command_exists jinjanate ; then - if ! command_exists pipx ; then - echo "Dependency not met: 'jinjanator' not installed and cannot install with 'pipx'" - exit 1 - fi - echo "'jinjanate' not found, installing with 'pipx'" - install_jinjanator_renderer - fi - - if ! command_exists docker && ! command_exists podman; then - echo "Dependency not met: Neither docker nor podman found" - exit 1 - fi - - if command_exists podman && ! command_exists skopeo; then - echo "Dependency not met: skopeo not installed. Run the following command to install it: 'sudo dnf install skopeo'" - exit 1 - fi -} - OPENSSL="" set_openssl_binary() { for s in openssl openssl3; do @@ -580,47 +507,24 @@ set_default_params() { # Set default values # Used for multi cluster setups - KIND_CREATE=${KIND_CREATE:-true} KIND_ADD_NODES=${KIND_ADD_NODES:-false} - KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-ovn} - # Setup KUBECONFIG patch based on cluster-name - export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf} - # Scrub any existing kubeconfigs at the path - if [ "${KIND_CREATE}" == true ]; then - rm -f ${KUBECONFIG} - fi MANIFEST_OUTPUT_DIR=${MANIFEST_OUTPUT_DIR:-${DIR}/../dist/yaml} if [ ${KIND_CLUSTER_NAME} != "ovn" ]; then MANIFEST_OUTPUT_DIR="${DIR}/../dist/yaml/${KIND_CLUSTER_NAME}" fi RUN_IN_CONTAINER=${RUN_IN_CONTAINER:-false} OVN_GATEWAY_MODE=${OVN_GATEWAY_MODE:-shared} - KIND_INSTALL_INGRESS=${KIND_INSTALL_INGRESS:-false} - KIND_INSTALL_METALLB=${KIND_INSTALL_METALLB:-false} - KIND_INSTALL_PLUGINS=${KIND_INSTALL_PLUGINS:-false} - KIND_INSTALL_KUBEVIRT=${KIND_INSTALL_KUBEVIRT:-false} KIND_OPT_OUT_KUBEVIRT_IPAM=${KIND_OPT_OUT_KUBEVIRT_IPAM:-false} - OVN_HA=${OVN_HA:-false} - KIND_LOCAL_REGISTRY=${KIND_LOCAL_REGISTRY:-false} KIND_LOCAL_REGISTRY_NAME=${KIND_LOCAL_REGISTRY_NAME:-kind-registry} KIND_LOCAL_REGISTRY_PORT=${KIND_LOCAL_REGISTRY_PORT:-5000} KIND_DNS_DOMAIN=${KIND_DNS_DOMAIN:-"cluster.local"} - KIND_CONFIG=${KIND_CONFIG:-${DIR}/kind.yaml.j2} - KIND_REMOVE_TAINT=${KIND_REMOVE_TAINT:-true} - PLATFORM_IPV4_SUPPORT=${PLATFORM_IPV4_SUPPORT:-true} - PLATFORM_IPV6_SUPPORT=${PLATFORM_IPV6_SUPPORT:-false} ENABLE_IPSEC=${ENABLE_IPSEC:-false} - OVN_HYBRID_OVERLAY_ENABLE=${OVN_HYBRID_OVERLAY_ENABLE:-false} OVN_DISABLE_SNAT_MULTIPLE_GWS=${OVN_DISABLE_SNAT_MULTIPLE_GWS:-false} OVN_DISABLE_FORWARDING=${OVN_DISABLE_FORWARDING:=false} OVN_ENCAP_PORT=${OVN_ENCAP_PORT:-""} OVN_DISABLE_PKT_MTU_CHECK=${OVN_DISABLE_PKT_MTU_CHECK:-false} - OVN_EMPTY_LB_EVENTS=${OVN_EMPTY_LB_EVENTS:-false} - OVN_MULTICAST_ENABLE=${OVN_MULTICAST_ENABLE:-false} KIND_ALLOW_SYSTEM_WRITES=${KIND_ALLOW_SYSTEM_WRITES:-false} - OVN_IMAGE=${OVN_IMAGE:-local} - OVN_REPO=${OVN_REPO:-""} - OVN_GITREF=${OVN_GITREF:-""} + MASTER_LOG_LEVEL=${MASTER_LOG_LEVEL:-5} NODE_LOG_LEVEL=${NODE_LOG_LEVEL:-5} DBCHECKER_LOG_LEVEL=${DBCHECKER_LOG_LEVEL:-5} @@ -635,67 +539,14 @@ set_default_params() { if [ "$OVN_ENABLE_EX_GW_NETWORK_BRIDGE" == true ]; then OVN_EX_GW_NETWORK_INTERFACE="eth1" fi - MULTI_POD_SUBNET=${MULTI_POD_SUBNET:-false} - # Input not currently validated. Modify outside script at your own risk. - # These are the same values defaulted to in KIND code (kind/default.go). - # NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host - # so it needs to use a larger subnet - # Upstream - NET_CIDR_IPV6=fd00:10:244::/64 SVC_CIDR_IPV6=fd00:10:96::/112 - NET_CIDR_IPV4=${NET_CIDR_IPV4:-10.244.0.0/16} - NET_CIDR_IPV6=${NET_CIDR_IPV6:-fd00:10:244::/48} - if [ "$MULTI_POD_SUBNET" == true ]; then - NET_CIDR_IPV4="10.243.0.0/23/24,10.244.0.0/16" - NET_CIDR_IPV6="fd00:10:243::/63/64,fd00:10:244::/48" - fi - NET_SECOND_CIDR_IPV4=${NET_SECOND_CIDR_IPV4:-172.19.0.0/16} - SVC_CIDR_IPV4=${SVC_CIDR_IPV4:-10.96.0.0/16} - SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} - JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16} - JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64} - MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/17} - MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112} - TRANSIT_SUBNET_IPV4=${TRANSIT_SUBNET_IPV4:-100.88.0.0/16} - TRANSIT_SUBNET_IPV6=${TRANSIT_SUBNET_IPV6:-fd97::/64} - METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16} - METALLB_CLIENT_NET_SUBNET_IPV6=${METALLB_CLIENT_NET_SUBNET_IPV6:-fc00:f853:ccd:e792::/64} - BGP_SERVER_NET_SUBNET_IPV4=${BGP_SERVER_NET_SUBNET_IPV4:-172.26.0.0/16} - BGP_SERVER_NET_SUBNET_IPV6=${BGP_SERVER_NET_SUBNET_IPV6:-fc00:f853:ccd:e796::/64} - KIND_NUM_MASTER=1 - OVN_ENABLE_INTERCONNECT=${OVN_ENABLE_INTERCONNECT:-true} OVN_ENABLE_OVNKUBE_IDENTITY=${OVN_ENABLE_OVNKUBE_IDENTITY:-true} - OVN_NETWORK_QOS_ENABLE=${OVN_NETWORK_QOS_ENABLE:-false} - - - if [ "$OVN_COMPACT_MODE" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != false ]; then - echo "Compact mode cannot be used together with Interconnect" - exit 1 - fi - - if [ "$OVN_HA" == true ]; then - KIND_NUM_MASTER=3 - KIND_NUM_WORKER=${KIND_NUM_WORKER:-0} - else - KIND_NUM_WORKER=${KIND_NUM_WORKER:-2} - fi KIND_NUM_INFRA=${KIND_NUM_INFRA:-0} KIND_INSTALL_PROMETHEUS=${KIND_INSTALL_PROMETHEUS:-false} - if [ "$OVN_ENABLE_INTERCONNECT" == true ]; then - KIND_NUM_NODES_PER_ZONE=${KIND_NUM_NODES_PER_ZONE:-1} - - TOTAL_NODES=$((KIND_NUM_WORKER + KIND_NUM_MASTER)) - if [[ ${KIND_NUM_NODES_PER_ZONE} -gt 1 ]] && [[ $((TOTAL_NODES % KIND_NUM_NODES_PER_ZONE)) -ne 0 ]]; then - echo "(Total k8s nodes / number of nodes per zone) should be zero" - exit 1 - fi - fi - OVN_HOST_NETWORK_NAMESPACE=${OVN_HOST_NETWORK_NAMESPACE:-ovn-host-network} OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} - METRICS_IP=${METRICS_IP:-""} - OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-identity ovnkube-zone-controller ovnkube-control-plane ovnkube-master ovnkube-node"} OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} @@ -707,61 +558,6 @@ set_default_params() { if [ "$OVN_DUMMY_GATEWAY_BRIDGE" == true ]; then OVN_GATEWAY_OPTS="--allow-no-uplink --gateway-interface=br-ex" fi - ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} - ENABLE_NETWORK_SEGMENTATION=${ENABLE_NETWORK_SEGMENTATION:-false} - if [ "$ENABLE_NETWORK_SEGMENTATION" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then - echo "Network segmentation (UDN) requires multi-network to be enabled (-mne)" - exit 1 - fi - - ENABLE_ROUTE_ADVERTISEMENTS=${ENABLE_ROUTE_ADVERTISEMENTS:-false} - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$ENABLE_MULTI_NET" != true ]; then - echo "Route advertisements requires multi-network to be enabled (-mne)" - exit 1 - fi - if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ] && [ "$OVN_ENABLE_INTERCONNECT" != true ]; then - echo "Route advertisements requires interconnect to be enabled (-ic)" - exit 1 - fi - - ENABLE_EVPN=${ENABLE_EVPN:-false} - if [ "$ENABLE_EVPN" == true ] && [ "$ENABLE_ROUTE_ADVERTISEMENTS" != true ]; then - echo "EVPN requires Route advertisements to be enabled (-rae)" - exit 1 - fi - - ENABLE_PRE_CONF_UDN_ADDR=${ENABLE_PRE_CONF_UDN_ADDR:-false} - if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Preconfigured UDN addresses requires network-segmentation to be enabled (-nse)" - exit 1 - fi - if [[ $ENABLE_PRE_CONF_UDN_ADDR == true && $OVN_ENABLE_INTERCONNECT != true ]]; then - echo "Preconfigured UDN addresses requires interconnect to be enabled (-ic)" - exit 1 - fi - - ENABLE_NETWORK_CONNECT=${ENABLE_NETWORK_CONNECT:-false} - if [[ $ENABLE_NETWORK_CONNECT == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Network connect requires network-segmentation to be enabled (-nse)" - exit 1 - fi - - DYNAMIC_UDN_ALLOCATION=${DYNAMIC_UDN_ALLOCATION:-false} - if [[ $DYNAMIC_UDN_ALLOCATION == true && $ENABLE_NETWORK_SEGMENTATION != true ]]; then - echo "Dynamic UDN allocation requires network-segmentation to be enabled (-nse)" - exit 1 - fi - DYNAMIC_UDN_GRACE_PERIOD=${DYNAMIC_UDN_GRACE_PERIOD:-120s} - ADVERTISED_UDN_ISOLATION_MODE=${ADVERTISED_UDN_ISOLATION_MODE:-strict} - ADVERTISE_DEFAULT_NETWORK=${ADVERTISE_DEFAULT_NETWORK:-false} - OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} - if [ "$OVN_COMPACT_MODE" == true ]; then - KIND_NUM_WORKER=0 - fi - OVN_MTU=${OVN_MTU:-1400} - OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} - OVN_OBSERV_ENABLE=${OVN_OBSERV_ENABLE:-false} - ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} } check_ipv6() { @@ -885,74 +681,6 @@ scale_kind_cluster() { fi } -create_kind_cluster() { - # Output of the jinjanate command - KIND_CONFIG_LCL=${DIR}/kind-${KIND_CLUSTER_NAME}.yaml - - ovn_ip_family=${IP_FAMILY} \ - ovn_ha=${OVN_HA} \ - net_cidr="${KIND_CIDR}" \ - svc_cidr=${SVC_CIDR} \ - use_local_registy=${KIND_LOCAL_REGISTRY} \ - dns_domain=${KIND_DNS_DOMAIN} \ - ovn_num_master=${KIND_NUM_MASTER} \ - ovn_num_worker=${KIND_NUM_WORKER} \ - kind_num_infra=${KIND_NUM_INFRA} \ - cluster_log_level=${KIND_CLUSTER_LOGLEVEL:-4} \ - kind_local_registry_port=${KIND_LOCAL_REGISTRY_PORT} \ - kind_local_registry_name=${KIND_LOCAL_REGISTRY_NAME} \ - jinjanate "${KIND_CONFIG}" -o "${KIND_CONFIG_LCL}" - - # Create KIND cluster. For additional debug, add '--verbosity ': 0 None .. 3 Debug - if kind get clusters | grep "${KIND_CLUSTER_NAME}"; then - delete - fi - - if [[ "${KIND_LOCAL_REGISTRY}" == true ]]; then - create_local_registry - fi - - kind create cluster --name "${KIND_CLUSTER_NAME}" --kubeconfig "${KUBECONFIG}" --image "${KIND_IMAGE}":"${K8S_VERSION}" --config=${KIND_CONFIG_LCL} --retain - - cat "${KUBECONFIG}" -} - -set_ovn_image() { - # if we're using the local registry and still need to build, push to local registry - if [ "$KIND_LOCAL_REGISTRY" == true ];then - OVN_IMAGE="localhost:5000/ovn-daemonset-fedora:latest" - else - OVN_IMAGE="localhost/ovn-daemonset-fedora:dev" - fi -} - -build_ovn_image() { - local push_args="" - if [ "$OCI_BIN" == "podman" ]; then - # docker doesn't perform tls check by default only podman does, hence we need to disable it for podman. - push_args="--tls-verify=false" - fi - - if [ "$OVN_IMAGE" == local ]; then - set_ovn_image - - # Build image - make -C ${DIR}/../dist/images IMAGE="${OVN_IMAGE}" OVN_REPO="${OVN_REPO}" OVN_GITREF="${OVN_GITREF}" OCI_BIN="${OCI_BIN}" fedora-image - - # store in local registry - if [ "$KIND_LOCAL_REGISTRY" == true ];then - echo "Pushing built image to local $OCI_BIN registry" - $OCI_BIN push $push_args "$OVN_IMAGE" - fi - # We should push to local registry if image is not remote - elif [ "${OVN_IMAGE}" != "" -a "${KIND_LOCAL_REGISTRY}" == true ] && (echo "$OVN_IMAGE" | grep / -vq); then - local local_registry_ovn_image="localhost:5000/${OVN_IMAGE}" - $OCI_BIN tag "$OVN_IMAGE" $local_registry_ovn_image - OVN_IMAGE=$local_registry_ovn_image - $OCI_BIN push $push_args "$OVN_IMAGE" - fi -} - create_ovn_kube_manifests() { local ovnkube_image=${OVN_IMAGE} if [ "$KIND_LOCAL_REGISTRY" == true ];then @@ -1022,6 +750,7 @@ create_ovn_kube_manifests() { --evpn-enable="${ENABLE_EVPN}" \ --advertise-default-network="${ADVERTISE_DEFAULT_NETWORK}" \ --advertised-udn-isolation-mode="${ADVERTISED_UDN_ISOLATION_MODE}" \ + --no-overlay-enable="${ENABLE_NO_OVERLAY}" \ --ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}" \ --metrics-ip="${METRICS_IP}" \ --compact-mode="${OVN_COMPACT_MODE}" \ @@ -1032,15 +761,10 @@ create_ovn_kube_manifests() { --network-qos-enable="${OVN_NETWORK_QOS_ENABLE}" \ --mtu="${OVN_MTU}" \ --enable-dnsnameresolver="${OVN_ENABLE_DNSNAMERESOLVER}" \ - --mtu="${OVN_MTU}" \ --enable-observ="${OVN_OBSERV_ENABLE}" popd } -install_ovn_image() { - install_image ${OVN_IMAGE} -} - install_ovn_global_zone() { if [ "$OVN_HA" == true ]; then run_kubectl apply -f ovnkube-db-raft.yaml @@ -1131,20 +855,12 @@ install_ovn() { run_kubectl apply -f rbac-ovnkube-master.yaml run_kubectl apply -f rbac-ovnkube-node.yaml run_kubectl apply -f rbac-ovnkube-db.yaml - MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") - # We want OVN HA not Kubernetes HA - # leverage the kubeadm well-known label node-role.kubernetes.io/control-plane= - # to choose the nodes where ovn master components will be placed - for n in $MASTER_NODES; do - kubectl label node "$n" k8s.ovn.org/ovnkube-db=true node-role.kubernetes.io/control-plane="" --overwrite - if [ "$KIND_REMOVE_TAINT" == true ]; then - # do not error if it fails to remove the taint - # remove both master and control-plane taints until master is removed from 1.25 - # // https://github.com/kubernetes/kubernetes/pull/107533 - kubectl taint node "$n" node-role.kubernetes.io/master:NoSchedule- || true - kubectl taint node "$n" node-role.kubernetes.io/control-plane:NoSchedule- || true - fi - done + if [ "${OVN_HA}" == "true" ]; then + label_ovn_ha + fi + if [ "$KIND_REMOVE_TAINT" == true ]; then + remove_no_schedule_taint + fi run_kubectl apply -f ovs-node.yaml @@ -1279,7 +995,7 @@ add_dns_hostnames() { done } -check_dependencies +check_common_dependencies # In order to allow providing arguments with spaces, e.g. "-vconsole:info -vfile:info" # the original command was replaced by parse_args "$@" @@ -1372,7 +1088,7 @@ if [ "$KIND_INSTALL_KUBEVIRT" == true ]; then fi fi if [ "$ENABLE_ROUTE_ADVERTISEMENTS" == true ]; then - install_ffr_k8s + install_frr_k8s fi interconnect_arg_check diff --git a/contrib/kind.yaml.j2 b/contrib/kind.yaml.j2 index 0f987d30bc..ee6d719ea9 100644 --- a/contrib/kind.yaml.j2 +++ b/contrib/kind.yaml.j2 @@ -14,7 +14,7 @@ networking: {%- if ovn_ip_family %} ipFamily: {{ ovn_ip_family }} {%- endif %} -{%- if use_local_registy == "true"%} +{%- if use_local_registry == "true"%} containerdConfigPatches: - |- [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:{{ kind_local_registry_port }}"] diff --git a/contrib/perf/generate_perf_report.py b/contrib/perf/generate_perf_report.py index 7139033289..0e80018f65 100644 --- a/contrib/perf/generate_perf_report.py +++ b/contrib/perf/generate_perf_report.py @@ -18,9 +18,10 @@ class MetricsProcessor: """Process and analyze metrics data from JSON files.""" - def __init__(self, metrics_dir: str = "."): + def __init__(self, metrics_dir: str = ".", workload: str = "kubelet-density-cni"): + self.workload = workload self.metrics_dir = metrics_dir - self.pod_latency_file = "podLatencyMeasurement-kubelet-density-cni.json" + self.pod_latency_file = f"podLatencyMeasurement-{self.workload}.json" self.container_cpu_file = "containerCPU.json" self.container_memory_file = "containerMemory.json" @@ -139,11 +140,12 @@ def get_container_type_from_container(self, container_name: str, pod_name: str) class ReportGenerator: """Generate text report from processed metrics data.""" - def __init__(self, title: str = "Kubernetes Workload Metrics Report"): + def __init__(self, title: str = "Kubernetes Workload Metrics Report", workload: str = "kubelet-density-cni"): self.title = title + self.workload = workload def generate_report(self, pod_latency: Dict[str, Any], ovn_cpu: Dict[str, Any], - ovn_memory: Dict[str, Any]) -> str: + ovn_memory: Dict[str, Any] ) -> str: """Generate complete text report.""" stats = pod_latency['stats'] @@ -151,7 +153,7 @@ def generate_report(self, pod_latency: Dict[str, Any], ovn_cpu: Dict[str, Any], # Header report_lines.append("# 📊 Kubernetes Workload Metrics Report") - report_lines.append("## kubelet-density-cni Performance Results") + report_lines.append(f"## {self.workload} Performance Results") report_lines.append("") report_lines.append(f"**Generated on:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}") report_lines.append("") @@ -291,6 +293,8 @@ def detect_pr_environment() -> Optional[str]: def main(): """Main function to generate the performance report.""" parser = argparse.ArgumentParser(description='Generate Kubernetes workload metrics report') + parser.add_argument('--workload', default='kubelet-density-cni', + help='Workload name (default: kubelet-density-cni)') parser.add_argument('--metrics-dir', default='.', help='Directory containing JSON metrics files (default: current directory)') parser.add_argument('--output', default='performance_report.md', @@ -310,8 +314,8 @@ def main(): print() # Initialize processor and generator - processor = MetricsProcessor(args.metrics_dir) - generator = ReportGenerator(args.title) + processor = MetricsProcessor(args.metrics_dir, args.workload) + generator = ReportGenerator(args.title, args.workload) # Load and process data print("📊 Loading and processing metrics data...") diff --git a/contrib/perf/workloads/cudn-density-l2-noPods.yml b/contrib/perf/workloads/cudn-density-l2-noPods.yml new file mode 100644 index 0000000000..401199555a --- /dev/null +++ b/contrib/perf/workloads/cudn-density-l2-noPods.yml @@ -0,0 +1,46 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: cudn-density-l2-nopods + jobIterations: 200 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: cudn-density-l2-nopods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + # Disabling churn until https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5883 is resolved + #churnConfig: + # percent: 10 + # cycles: 10 + # mode: objects + objects: + - objectTemplate: workloads/templates/udn-density/cudn_l2.yml + replicas: 1 + - objectTemplate: workloads/templates/udn-density/cudn_ns.yml + replicas: 1 + diff --git a/contrib/perf/workloads/kubelet-density-cni.yml b/contrib/perf/workloads/kubelet-density-cni.yml index 7d97f60579..dd3f193343 100644 --- a/contrib/perf/workloads/kubelet-density-cni.yml +++ b/contrib/perf/workloads/kubelet-density-cni.yml @@ -39,11 +39,11 @@ jobs: mode: objects objects: - - objectTemplate: templates/webserver-deployment.yml + - objectTemplate: workloads/templates/kubelet-density-cni/webserver-deployment.yml replicas: 1 - - objectTemplate: templates/webserver-service.yml + - objectTemplate: workloads/templates/kubelet-density-cni/webserver-service.yml replicas: 1 - - objectTemplate: templates/curl-deployment.yml + - objectTemplate: workloads/templates/kubelet-density-cni/curl-deployment.yml replicas: 1 diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml b/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml new file mode 100644 index 0000000000..d437769475 --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/curl-deployment.yml @@ -0,0 +1,40 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: curl-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: curl-{{.Replica}}-{{.Iteration}} + spec: + nodeSelector: + node-role.kubernetes.io/worker: "" + containers: + - name: curlapp + image: quay.io/cloud-bulldozer/curl:latest + command: ["sleep", "inf"] + env: + - name: WEBSERVER_HOSTNAME + value: webserver-{{.Replica}}-{{.Iteration}} + - name: WEBSERVER_PORT + value: "8080" + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + startupProbe: + exec: + command: + - "/bin/sh" + - "-c" + - "curl ${WEBSERVER_HOSTNAME}:${WEBSERVER_PORT}" + periodSeconds: 1 + timeoutSeconds: 1 + failureThreshold: 600 + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: curl-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml new file mode 100644 index 0000000000..b9c904154a --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-deployment.yml @@ -0,0 +1,28 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: webserver-{{.Replica}}-{{.Iteration}} + spec: + nodeSelector: + node-role.kubernetes.io/worker: "" + containers: + - name: webserver + image: quay.io/cloud-bulldozer/sampleapp:latest + ports: + - containerPort: 8080 + protocol: TCP + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: webserver-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml new file mode 100644 index 0000000000..a569151b82 --- /dev/null +++ b/contrib/perf/workloads/templates/kubelet-density-cni/webserver-service.yml @@ -0,0 +1,12 @@ +kind: Service +apiVersion: v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + selector: + name: webserver-{{.Replica}}-{{.Iteration}} + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/contrib/perf/workloads/templates/udn-density/cudn_l2.yml b/contrib/perf/workloads/templates/udn-density/cudn_l2.yml new file mode 100644 index 0000000000..81be6ac9da --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/cudn_l2.yml @@ -0,0 +1,14 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: ClusterUserDefinedNetwork +metadata: + name: l2-network-{{.Iteration}} +spec: + namespaceSelector: + matchLabels: + cudn-scale: "{{.JobName}}-{{.Iteration}}" + network: + topology: Layer2 + layer2: + role: Primary + subnets: ["10.132.0.0/16"] \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/cudn_ns.yml b/contrib/perf/workloads/templates/udn-density/cudn_ns.yml new file mode 100644 index 0000000000..8f6387a459 --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/cudn_ns.yml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: "{{.JobName}}-{{.Iteration}}" + labels: + k8s.ovn.org/primary-user-defined-network: "" + cudn-scale: "{{.JobName}}-{{.Iteration}}" \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/deployment-client.yml b/contrib/perf/workloads/templates/udn-density/deployment-client.yml new file mode 100644 index 0000000000..d153adfabe --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/deployment-client.yml @@ -0,0 +1,57 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: client-{{.Replica}} +spec: + replicas: 1 + selector: + matchLabels: + name: client-{{.Replica}} + template: + metadata: + labels: + name: client-{{.Replica}} + app: client + spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: client + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + - key: node-role.kubernetes.io/infra + operator: DoesNotExist + - key: node-role.kubernetes.io/workload + operator: DoesNotExist + containers: + - name: client-app + image: quay.io/cloud-bulldozer/curl:latest + command: ["sleep", "inf"] + resources: + requests: + memory: "10Mi" + cpu: "10m" + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + volumeMounts: + - name: podinfo + mountPath: /etc/podlabels + volumes: + - name: podinfo + downwardAPI: + items: + - path: "labels" + fieldRef: + fieldPath: metadata.labels + restartPolicy: Always + strategy: + type: RollingUpdate diff --git a/contrib/perf/workloads/templates/udn-density/udn_l2.yml b/contrib/perf/workloads/templates/udn-density/udn_l2.yml new file mode 100644 index 0000000000..fe0c222dd6 --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/udn_l2.yml @@ -0,0 +1,10 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: l2-network-{{.Iteration}} +spec: + topology: Layer2 + layer2: + role: Primary + subnets: ["10.132.0.0/16"] \ No newline at end of file diff --git a/contrib/perf/workloads/templates/udn-density/udn_l3.yml b/contrib/perf/workloads/templates/udn-density/udn_l3.yml new file mode 100644 index 0000000000..0a8de7688d --- /dev/null +++ b/contrib/perf/workloads/templates/udn-density/udn_l3.yml @@ -0,0 +1,13 @@ +--- +apiVersion: k8s.ovn.org/v1 +kind: UserDefinedNetwork +metadata: + name: l3-network-{{.Iteration}} +spec: + topology: Layer3 + layer3: + role: Primary + subnets: + - cidr: 10.132.0.0/16 + hostSubnet: 24 + mtu: 1300 diff --git a/contrib/perf/workloads/udn-density-l2-noPods.yml b/contrib/perf/workloads/udn-density-l2-noPods.yml new file mode 100644 index 0000000000..310f3c87dd --- /dev/null +++ b/contrib/perf/workloads/udn-density-l2-noPods.yml @@ -0,0 +1,48 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: udn-density-l2-nopods + jobIterations: 200 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: udn-density-l2-nopods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + churnConfig: + percent: 10 + cycles: 5 + delay: 2m + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + k8s.ovn.org/primary-user-defined-network: "" + objects: + - objectTemplate: workloads/templates/udn-density/udn_l2.yml + replicas: 1 diff --git a/contrib/perf/workloads/udn-density-l2-pods.yml b/contrib/perf/workloads/udn-density-l2-pods.yml new file mode 100644 index 0000000000..d6ba0170cd --- /dev/null +++ b/contrib/perf/workloads/udn-density-l2-pods.yml @@ -0,0 +1,50 @@ +--- +global: + measurements: + - name: podLatency + - name: pprof + pprofInterval: 1m + pprofDirectory: pprof-data + pprofTargets: + - name: ovnkube-controller + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/profile?seconds=30 + - name: ovnkube-control-plane + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/profile?seconds=30 + - name: ovnkube-controller-heap + namespace: "ovn-kubernetes" + labelSelector: {app: ovnkube-node} + url: http://localhost:9410/debug/pprof/heap?seconds=30 + - name: ovnkube-control-plane-heap + namespace: "ovn-kubernetes" + labelSelector: {name: ovnkube-control-plane} + url: http://localhost:9411/debug/pprof/heap?seconds=30 +jobs: + - name: udn-density-l2-pods + jobIterations: 100 + qps: 10 + burst: 10 + namespacedIterations: true + namespace: udn-density-l2-pods + waitWhenFinished: true + podWait: false + preLoadImages: false + preLoadPeriod: 2m + churnConfig: + percent: 10 + cycles: 5 + delay: 2m + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + k8s.ovn.org/primary-user-defined-network: "" + objects: + - objectTemplate: workloads/templates/udn-density/udn_l2.yml + replicas: 1 + - objectTemplate: workloads/templates/udn-density/deployment-client.yml + replicas: 1 diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index ecdc231785..4430d29143 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -69,7 +69,7 @@ OVN_EGRESSFIREWALL_ENABLE= OVN_EGRESSQOS_ENABLE= OVN_EGRESSSERVICE_ENABLE= OVN_MULTI_NETWORK_ENABLE= -OVN_NETWORK_SEGMENTATION_ENABLE= +OVN_NETWORK_SEGMENTATION_ENABLE="false" OVN_NETWORK_CONNECT_ENABLE= OVN_PRE_CONF_UDN_ADDR_ENABLE= OVN_DYNAMIC_UDN_ALLOCATION= @@ -78,6 +78,7 @@ OVN_ROUTE_ADVERTISEMENTS_ENABLE= OVN_EVPN_ENABLE= OVN_ADVERTISE_DEFAULT_NETWORK= OVN_ADVERTISED_UDN_ISOLATION_MODE= +OVN_NO_OVERLAY_ENABLE= OVN_V4_JOIN_SUBNET="" OVN_V6_JOIN_SUBNET="" OVN_V4_MASQUERADE_SUBNET="" @@ -93,6 +94,7 @@ OVN_IPFIX_CACHE_ACTIVE_TIMEOUT="" OVN_HOST_NETWORK_NAMESPACE="" OVN_EX_GW_NETWORK_INTERFACE="" OVNKUBE_NODE_MGMT_PORT_NETDEV="" +OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME="" OVNKUBE_CONFIG_DURATION_ENABLE= OVNKUBE_METRICS_SCALE_ENABLE= OVN_STATELESS_NETPOL_ENABLE="false" @@ -301,6 +303,9 @@ while [ "$1" != "" ]; do --advertised-udn-isolation-mode) OVN_ADVERTISED_UDN_ISOLATION_MODE=$VALUE ;; + --no-overlay-enable) + OVN_NO_OVERLAY_ENABLE=$VALUE + ;; --egress-service-enable) OVN_EGRESSSERVICE_ENABLE=$VALUE ;; @@ -352,6 +357,9 @@ while [ "$1" != "" ]; do --ovnkube-node-mgmt-port-dp-resource-name) OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME=$VALUE ;; + --mgmt-port-vfs-count) + MGMT_PORT_VFS_COUNT=$VALUE + ;; --ovnkube-config-duration-enable) OVNKUBE_CONFIG_DURATION_ENABLE=$VALUE ;; @@ -403,9 +411,33 @@ while [ "$1" != "" ]; do --no-hostsubnet-label) OVN_NOHOSTSUBNET_LABEL=$VALUE ;; - --ovn_disable_requestedchassis) + --ovn-disable-requestedchassis) OVN_DISABLE_REQUESTEDCHASSIS=$value ;; + --metrics-port) + METRICS_PORT=$value + ;; + --dpuhost-cluster-net-cidr) + DPUHOST_CLUSTER_NET_CIDR=$value + ;; + --dpuhost-cluster-svc-cidr) + DPUHOST_CLUSTER_SVC_CIDR=$value + ;; + --dpuhost-cluster-k8s-apiserver) + DPUHOST_CLUSTER_K8S_APISERVER=$value + ;; + --dpuhost-cluster-k8s-token) + DPUHOST_CLUSTER_K8S_TOKEN=$value + ;; + --dpuhost-cluster-k8s-cacert-data) + DPUHOST_CLUSTER_K8S_CACERT_DATA=$value + ;; + --dpuhost-cluster-k8s-token-file) + DPUHOST_CLUSTER_K8S_TOKEN_FILE=$value + ;; + --dpuhost-cluster-k8s-cacert) + DPUHOST_CLUSTER_K8S_CACERT=$value + ;; *) echo "WARNING: unknown parameter \"$PARAM\"" exit 1 @@ -506,6 +538,8 @@ ovn_advertise_default_network=${OVN_ADVERTISE_DEFAULT_NETWORK} echo "ovn_advertise_default_network: ${ovn_advertise_default_network}" ovn_advertised_udn_isolation_mode=${OVN_ADVERTISED_UDN_ISOLATION_MODE} echo "ovn_advertised_udn_isolation_mode: ${ovn_advertised_udn_isolation_mode}" +ovn_no_overlay_enable=${OVN_NO_OVERLAY_ENABLE} +echo "ovn_no_overlay_enable: ${ovn_no_overlay_enable}" ovn_hybrid_overlay_net_cidr=${OVN_HYBRID_OVERLAY_NET_CIDR} echo "ovn_hybrid_overlay_net_cidr: ${ovn_hybrid_overlay_net_cidr}" ovn_disable_snat_multiple_gws=${OVN_DISABLE_SNAT_MULTIPLE_GWS} @@ -578,6 +612,10 @@ ovn_ex_gw_networking_interface=${OVN_EX_GW_NETWORK_INTERFACE} echo "ovn_ex_gw_networking_interface: ${ovn_ex_gw_networking_interface}" ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV} echo "ovnkube_node_mgmt_port_netdev: ${ovnkube_node_mgmt_port_netdev}" +ovnkube_node_mgmt_port_dp_resource_name=${OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME} +echo "ovnkube_node_mgmt_port_dp_resource_name: ${ovnkube_node_mgmt_port_dp_resource_name}" +mgmt_port_vfs_count=${MGMT_PORT_VFS_COUNT:-1} +echo "mgmt_port_vfs_count: ${mgmt_port_vfs_count}" ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE} echo "ovnkube_config_duration_enable: ${ovnkube_config_duration_enable}" ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE} @@ -662,6 +700,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -722,6 +761,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -787,9 +827,14 @@ ovn_image=${image} \ ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ + ovnkube_node_mgmt_port_dp_resource_name=${ovnkube_node_mgmt_port_dp_resource_name} \ + mgmt_port_vfs_count=${mgmt_port_vfs_count} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_network_qos_enable=${ovn_network_qos_enable} \ metrics_ip=${metrics_ip} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ + ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovnkube_app_name=ovnkube-node-dpu-host \ jinjanate ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node-dpu-host.yaml @@ -830,6 +875,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ @@ -888,6 +934,7 @@ ovn_image=${ovnkube_image} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_enable_dynamic_udn_allocation=${ovn_enable_dynamic_udn_allocation} \ ovn_dynamic_udn_grace_period=${ovn_dynamic_udn_grace_period} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ @@ -976,6 +1023,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_egress_service_enable=${ovn_egress_service_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ @@ -1012,6 +1060,102 @@ ovn_image=${ovnkube_image} \ enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml +# ovnkube-single-node-zone-dpu +dpuhost_cluster_net_cidr=${DPUHOST_CLUSTER_NET_CIDR:-"10.244.0.0/16/24"} +dpuhost_cluster_svc_cidr=${DPUHOST_CLUSTER_SVC_CIDR:-"10.96.0.0/16"} +dpuhost_cluster_k8s_apiserver=${DPUHOST_CLUSTER_K8S_APISERVER:-"https://172.25.0.2:6443"} +dpuhost_cluster_k8s_token=${DPUHOST_CLUSTER_K8S_TOKEN:-""} +dpuhost_cluster_k8s_cacert_data=${DPUHOST_CLUSTER_K8S_CACERT_DATA:-""} +dpuhost_cluster_k8s_token_file=${DPUHOST_CLUSTER_K8S_TOKEN_FILE:-""} +dpuhost_cluster_k8s_cacert=${DPUHOST_CLUSTER_K8S_CACERT:-""} +mtu=${OVN_MTU:-1400} +metrics_port=${METRICS_PORT:-9476} +echo "dpuhost_cluster_net_cidr: ${dpuhost_cluster_net_cidr}" +echo "dpuhost_cluster_svc_cidr: ${dpuhost_cluster_svc_cidr}" +echo "dpuhost_cluster_k8s_apiserver: ${dpuhost_cluster_k8s_apiserver}" +echo "dpuhost_cluster_k8s_token: ${dpuhost_cluster_k8s_token}" +echo "dpuhost_cluster_k8s_cacert_data: ${dpuhost_cluster_k8s_cacert_data}" +echo "dpuhost_cluster_k8s_token_file: ${dpuhost_cluster_k8s_token_file}" +echo "dpuhost_cluster_k8s_cacert: ${dpuhost_cluster_k8s_cacert}" +echo "mtu: ${mtu}" +echo "metrics_port: ${metrics_port}" + +ovn_image=${ovnkube_image} \ + ovn_image_pull_policy=${image_pull_policy} \ + ovn_unprivileged_mode=${ovn_unprivileged_mode} \ + ovn_gateway_mode=${ovn_gateway_mode} \ + ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_loglevel_nb=${ovn_loglevel_nb} ovn_loglevel_sb=${ovn_loglevel_sb} \ + ovn_northd_backoff_interval=${ovn_northd_backoff_interval} \ + ovn_loglevel_northd=${ovn_loglevel_northd} \ + ovnkube_node_loglevel=${node_loglevel} \ + ovn_loglevel_controller=${ovn_loglevel_controller} \ + ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ + ovnkube_logfile_maxbackups=${ovnkube_logfile_maxbackups} \ + ovnkube_logfile_maxage=${ovnkube_logfile_maxage} \ + ovnkube_libovsdb_client_logfile=${ovnkube_libovsdb_client_logfile} \ + ovnkube_config_duration_enable=${ovnkube_config_duration_enable} \ + ovnkube_metrics_scale_enable=${ovnkube_metrics_scale_enable} \ + metrics_ip=${metrics_ip} \ + ovn_hybrid_overlay_net_cidr=${ovn_hybrid_overlay_net_cidr} \ + ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \ + ovn_disable_snat_multiple_gws=${ovn_disable_snat_multiple_gws} \ + ovn_disable_forwarding=${ovn_disable_forwarding} \ + ovn_encap_port=${ovn_encap_port} \ + ovn_disable_pkt_mtu_check=${ovn_disable_pkt_mtu_check} \ + ovn_v4_join_subnet=${ovn_v4_join_subnet} \ + ovn_v6_join_subnet=${ovn_v6_join_subnet} \ + ovn_v4_masquerade_subnet=${ovn_v4_masquerade_subnet} \ + ovn_v6_masquerade_subnet=${ovn_v6_masquerade_subnet} \ + ovn_multicast_enable=${ovn_multicast_enable} \ + ovn_admin_network_policy_enable=${ovn_admin_network_policy_enable} \ + ovn_egress_ip_enable=${ovn_egress_ip_enable} \ + ovn_egress_ip_healthcheck_port=${ovn_egress_ip_healthcheck_port} \ + ovn_egress_firewall_enable=${ovn_egress_firewall_enable} \ + ovn_egress_qos_enable=${ovn_egress_qos_enable} \ + ovn_multi_network_enable=${ovn_multi_network_enable} \ + ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ + ovn_network_connect_enable=${ovn_network_connect_enable} \ + ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ + ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_egress_service_enable=${ovn_egress_service_enable} \ + ovn_ssl_en=${ovn_ssl_en} \ + ovn_remote_probe_interval=${ovn_remote_probe_interval} \ + ovn_monitor_all=${ovn_monitor_all} \ + ovn_ofctrl_wait_before_clear=${ovn_ofctrl_wait_before_clear} \ + ovn_enable_lflow_cache=${ovn_enable_lflow_cache} \ + ovn_lflow_cache_limit=${ovn_lflow_cache_limit} \ + ovn_lflow_cache_limit_kb=${ovn_lflow_cache_limit_kb} \ + ovn_netflow_targets=${ovn_netflow_targets} \ + ovn_sflow_targets=${ovn_sflow_targets} \ + ovn_ipfix_targets=${ovn_ipfix_targets} \ + ovn_ipfix_sampling=${ovn_ipfix_sampling} \ + ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ + ovn_ipfix_cache_max_flows=${ovn_ipfix_cache_max_flows} \ + ovn_ipfix_cache_active_timeout=${ovn_ipfix_cache_active_timeout} \ + ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ + ovn_acl_logging_rate_limit=${ovn_acl_logging_rate_limit} \ + ovn_empty_lb_events=${ovn_empty_lb_events} \ + ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ + ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ + ovn_network_qos_enable=${ovn_network_qos_enable} \ + ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ + ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ + ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_observ_enable=${ovn_observ_enable} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ + mtu_value=${mtu} \ + metrics_port=${metrics_port} \ + dpuhost_cluster_net_cidr=${dpuhost_cluster_net_cidr} \ + dpuhost_cluster_svc_cidr=${dpuhost_cluster_svc_cidr} \ + dpuhost_cluster_k8s_apiserver=${dpuhost_cluster_k8s_apiserver} \ + dpuhost_cluster_k8s_token=${dpuhost_cluster_k8s_token} \ + dpuhost_cluster_k8s_cacert_data=${dpuhost_cluster_k8s_cacert_data} \ + dpuhost_cluster_k8s_token_file=${dpuhost_cluster_k8s_token_file} \ + dpuhost_cluster_k8s_cacert=${dpuhost_cluster_k8s_cacert} \ + jinjanate ../templates/ovnkube-single-node-zone-dpu.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone-dpu.yaml + ovn_image=${ovnkube_image} \ ovn_image_pull_policy=${image_pull_policy} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ @@ -1052,6 +1196,7 @@ ovn_image=${ovnkube_image} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_remote_probe_interval=${ovn_remote_probe_interval} \ ovn_monitor_all=${ovn_monitor_all} \ @@ -1141,6 +1286,7 @@ net_cidr=${net_cidr} svc_cidr=${svc_cidr} \ host_network_namespace=${host_network_namespace} \ in_upgrade=${in_upgrade} \ advertise_default_network=${ovn_advertise_default_network} \ + ovn_no_overlay_enable=${ovn_no_overlay_enable} \ jinjanate ../templates/ovn-setup.yaml.j2 -o ${output_dir}/ovn-setup.yaml ovn_enable_interconnect=${ovn_enable_interconnect} \ @@ -1161,6 +1307,7 @@ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ +ovn_enable_interconnect=${ovn_enable_interconnect} \ jinjanate ../templates/rbac-ovnkube-master.yaml.j2 -o ${output_dir}/rbac-ovnkube-master.yaml cp ../templates/rbac-ovnkube-identity.yaml.j2 ${output_dir}/rbac-ovnkube-identity.yaml diff --git a/dist/images/ovndb-raft-functions.sh b/dist/images/ovndb-raft-functions.sh index 38396fed33..0db584a020 100644 --- a/dist/images/ovndb-raft-functions.sh +++ b/dist/images/ovndb-raft-functions.sh @@ -9,7 +9,7 @@ verify-ovsdb-raft() { exit 1 fi - replicas=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + replicas=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get statefulset -n ${ovn_kubernetes_namespace} ovnkube-db -o=jsonpath='{.spec.replicas}') if [[ ${replicas} -lt 3 || $((${replicas} % 2)) -eq 0 ]]; then echo "at least 3 nodes need to be configured, and it must be odd number of nodes" @@ -25,7 +25,7 @@ db_part_of_cluster() { local db=${2} local port=${3} echo "Checking if ${pod} is part of cluster" - init_ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + init_ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ${pod} -o=jsonpath='{.status.podIP}') if [[ $? != 0 ]]; then echo "Unable to get ${pod} ip " @@ -51,7 +51,7 @@ cluster_exists() { local db=${1} local port=${2} - db_pods=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + db_pods=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} -o=jsonpath='{.items[*].metadata.name}' | egrep -o 'ovnkube-db[^ ]+') for db_pod in $db_pods; do @@ -62,7 +62,7 @@ cluster_exists() { done # if we get here there is no cluster, set init_ip and get out - init_ip="$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + init_ip="$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ovnkube-db-0 -o=jsonpath='{.status.podIP}')" if [[ $? != 0 ]]; then return 1 @@ -89,7 +89,7 @@ check_and_apply_ovnkube_db_ep() { local port=${1} # return if ovn db service endpoint already exists - result=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + result=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get ep -n ${ovn_kubernetes_namespace} ovnkube-db 2>&1) test $? -eq 0 && return if ! echo ${result} | grep -q "NotFound"; then @@ -99,7 +99,7 @@ check_and_apply_ovnkube_db_ep() { # Get IPs of all ovnkube-db PODs ips=() for ((i = 0; i < ${replicas}; i++)); do - ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + ip=$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get pod -n ${ovn_kubernetes_namespace} ovnkube-db-${i} -o=jsonpath='{.status.podIP}') if [[ ${ip} == "" ]]; then break diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index aa81ecfa4e..8f01b2f458 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -44,6 +44,8 @@ fi # OVN_DAEMONSET_VERSION - version match daemonset and image - v1.2.0 # K8S_TOKEN - the apiserver token. Automatically detected when running in a pod - v3 # K8S_CACERT - the apiserver CA. Automatically detected when running in a pod - v3 +# K8S_TOKEN_FILE - the apiserver token file. Automatically detected when running in a pod - v3 +# K8S_CACERT_DATA - the apiserver CA data. # OVN_CONTROLLER_OPTS - the options for ovn-ctl # OVN_NORTHD_OPTS - the options for the ovn northbound db # OVN_GATEWAY_MODE - the gateway mode (shared or local) - v3 @@ -145,7 +147,7 @@ else fi # certs and private keys for k8s and OVN -K8S_CACERT=${K8S_CACERT:-/var/run/secrets/kubernetes.io/serviceaccount/ca.crt} +k8s_cacert=${K8S_CACERT:-/var/run/secrets/kubernetes.io/serviceaccount/ca.crt} ovn_ca_cert=/ovn-cert/ca-cert.pem ovn_nb_pk=/ovn-cert/ovnnb-privkey.pem @@ -331,7 +333,7 @@ ovn_observ_enable=${OVN_OBSERV_ENABLE:-false} # OVN_NOHOSTSUBNET_LABEL - node label indicating nodes managing their own network ovn_nohostsubnet_label=${OVN_NOHOSTSUBNET_LABEL:-""} # OVN_DISABLE_REQUESTEDCHASSIS - disable requested-chassis option during pod creation -# should be set to true when dpu nodes are in the cluster +# should be set to true when dpu nodes are in the cluster for OVN Central mode ovn_disable_requestedchassis=${OVN_DISABLE_REQUESTEDCHASSIS:-false} # external_ids:host-k8s-nodename is set on an Open_vSwitch enabled system if the ovnkube stack @@ -450,7 +452,7 @@ ready_to_start_node() { ovnkube_db_ep=$(get_ovnkube_zone_db_ep) echo "Getting the ${ovnkube_db_ep} ep" # See if ep is available ... - IFS=" " read -a ovn_db_hosts <<<"$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} \ + IFS=" " read -a ovn_db_hosts <<<"$(kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${k8s_cacert} \ get ep -n ${ovn_kubernetes_namespace} ${ovnkube_db_ep} -o=jsonpath='{range .subsets[0].addresses[*]}{.ip}{" "}')" if [[ ${#ovn_db_hosts[@]} == 0 ]]; then return 1 @@ -632,6 +634,32 @@ check_health() { return 1 } +get_dpu_gw_options() { + # If ovn_gateway_opts or ovn_gateway_router_subnet is not set as environment variable, gather them from ovs settings + if [[ ${ovn_gateway_opts} == "" ]]; then + # get the gateway interface + gw_iface=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-interface | tr -d \") + if [[ ${gw_iface} == "" ]]; then + echo "Couldn't get OVN Gateway Interface from ovs external_ids setting" + else + ovn_gateway_opts="--gateway-interface=${gw_iface} " + fi + + # get the gateway nexthop + gw_nexthop=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-nexthop | tr -d \") + if [[ ${gw_nexthop} == "" ]]; then + echo "Couldn't get OVN Gateway NextHop from ovs external_ids setting" + else + ovn_gateway_opts+="--gateway-nexthop=${gw_nexthop} " + fi + fi + + # this is only required if the DPU and DPU Host are in different subnets + if [[ ${ovn_gateway_router_subnet} == "" ]]; then + ovn_gateway_router_subnet=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-router-subnet | tr -d \") + fi +} + display_file() { if [[ -f $3 ]]; then echo "====================== $1 pid " @@ -837,7 +865,7 @@ set_ovnkube_db_ep() { ovnkube_db_ep=$(get_ovnkube_zone_db_ep) echo "=============== setting ${ovnkube_db_ep} endpoints to ${ips[@]}" # create a new endpoint for the headless onvkube-db service without selectors - kubectl --server=${K8S_APISERVER} --token=${k8s_token} --certificate-authority=${K8S_CACERT} apply -f - <... + ``` + +3. **Binary collection** happens during log export. The `export-kind-logs.sh` script + searches all containers for the crashed binary and copies it alongside the coredump. + +4. **Artifacts are uploaded** to GitHub Actions and can be downloaded from the job's + artifacts section. + +### Downloading Artifacts + +After a CI job completes, download the `kind-logs-*` artifact from the GitHub Actions +job page. Extract it to find: + +``` +/tmp/kind/logs/coredumps/ +├── core.29132.ovnkube.ovn-worker.6 # Coredump file +└── binaries/ + └── ovnkube # Matching binary +``` + +### Debugging with Delve + +Use the [Delve](https://github.com/go-delve/delve) debugger for post-mortem analysis. + +1. **Create a path substitution file** (`dlv.init`) to map build paths to your local + source checkout: + + ``` + config substitute-path /workspace/ovn-kubernetes/go-controller /path/to/your/ovn-kubernetes/go-controller + config substitute-path /usr/local/go /path/to/your/go/installation + ``` + + The build paths can be found by running `dlv core` without the init file and + using the `list` command - it will show the paths it's looking for. + +2. **Start the debugger**: + + ```bash + dlv core ./binaries/ovnkube ./core.29132.ovnkube.ovn-worker.6 --init dlv.init + ``` + +3. **Explore the crash**: + + ``` + (dlv) goroutines # List all goroutines + (dlv) goroutine # Switch to a specific goroutine + (dlv) bt # Show backtrace + (dlv) frame # Select stack frame + (dlv) list # Show source code at current location + (dlv) locals # Show local variables + (dlv) print # Print variable value + ``` + +### Local Development + +To enable coredump collection in a local KIND cluster: + +```bash +ENABLE_COREDUMPS=true ./contrib/kind.sh +``` + +To manually export logs with coredump binaries: + +```bash +./contrib/export-kind-logs.sh /path/to/output +``` diff --git a/docs/installation/launching-ovn-kubernetes-with-dpu.md b/docs/installation/launching-ovn-kubernetes-with-dpu.md new file mode 100644 index 0000000000..b7ad6c3613 --- /dev/null +++ b/docs/installation/launching-ovn-kubernetes-with-dpu.md @@ -0,0 +1,189 @@ +# Launching OVN-Kubernetes in DPU-Accelerated environment in interconnect mode + +## OVN K8s cluster setup + +OVN K8s CNI in a DPU-Accelerated environment is deployed using two Kubernetes clusters, one for the hosts and other for the DPUs. + +DPUs in the DPU cluster will watch DPU Host cluster for K8s resources such as Pods, Namespaces, NetworkAttachmentDefinitions, Services, and Endpoints and act on updates to those resources. Hence they require credentials to access DPU host cluster. Each DPU will have a setting denoting the DPU host to which it is associated. + +Refer [DPU support](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/dpu-support.md) for more details on the setup. + +## SR-IOV settings on DPU Host + +Follow [OVS Acceleration with Kernel datapath](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/ovs-kernel.md) or [OVS Acceleration with DOCA datapath](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/features/hardware-offload/ovs-doca.md) to enable Open vSwitch hardware offloading feature on DPU hosts. + +A single VF net-device or a group of VF net-devices (configured as SR-IOV device plugin resource pool) need to be setup separately to create management port(s). + +## K8s Settings on DPU Host + +The following node labels must be set on the DPU Host prior to installing OVN K8s CNI + +```yaml +k8s.ovn.org/dpu-host= +k8s.ovn.org/zone-name="dpu-host node name" +``` + +## Launching OVN K8s DPU Host cluster using helm +OVN K8s CNI can be deployed using helm charts provided under [OVN K8s Helm Charts](https://github.com/ovn-kubernetes/ovn-kubernetes/tree/master/helm/ovn-kubernetes). Refer [Launching OVN-Kubernetes using Helm Charts](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/master/docs/installation/launching-ovn-kubernetes-with-helm.md) for general instructions on using helm charts and explanation of common values used in various subcharts. + +For DPU Hosts cluster use values-single-node-zone.yaml by setting the following fields as specified. The other fields in the file can be set as needed. + +```yaml +tags: + ovnkube-node-dpu-host: true # Removing this line will also enable applying ovnkube-node-dpu-host subchart + ovs-node: false # Disable ovs-node subchart, as OVS is already provided by the corresponding DPU +global: + enableOvnKubeIdentity: false # This feature is not supported currently for clusters with DPU/DPU-Hosts +``` + +ovn-kubernetes image to be used in the containers should be provided in the image section +```yaml +global: + image: + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora + tag: master +``` + +Management port netdevice information should be provided in values.yaml file under helm/ovn-kubernetes/charts/ovnkube-node-dpu-host. For example, +```yaml +nodeMgmtPortNetdev: "enp1s0f0v0" # Single VF net-device to be used for management port or +mgmtPortVFResourceName: "mgmtport_vfs" # SR-IOV device plugin resource pool from which VF net-device(s) can be selected. +mgmtPortVFsCount: 2 # If using UDNs, the number of VFs required to handle management ports, which depends on the number of primary UDNs needed should be specified. +``` + +mgmtPortVFResourceName will be prioritized over nodeMgmtPortNetdev if both are specified. +If using UDNs, mgmtPortVFResourceName and mgmtPortVFsCount should be specified. + +Launch OVN K8s using +``` +helm install ovn-kubernetes . -f values-single-node-zone.yaml +``` + +## Generating credentials for accessing this cluster from DPU + +After deploying the CNI, create a secret in this cluster for service account ovnkube-node by applying the following +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: ovnkube-node-sa-for-dpu + namespace: ovn-kubernetes + annotations: + kubernetes.io/service-account.name: ovnkube-node +type: kubernetes.io/service-account-token +``` + +Get the value of ca.crt and token, which will be used in the DPU cluster. The token should be base64 decoded, but the encoded ca.crt should be used as is. + +## K8s Settings on DPU + +The following node label is required on DPUs prior to installing OVN K8s CNI +```yaml +k8s.ovn.org/dpu= +``` + +## OVS settings on DPU +Some OVS settings are required on the DPU to enable hardware offloads, connect to the right DPU-host in the DPU-host cluster and correctly steer traffic flows. + +Consider an example with ovs bridge configuration on DPU and network settings on DPU and DPU Host as below. + +``` +ovs-vsctl show + Bridge brp0 + fail_mode: standalone + Port pf0hpf + tag: 3 + Interface pf0hpf + type: system + Port p0 + Interface p0 + type: system + Port vtep0 + tag: 2 + Interface vtep0 + type: internal + Port brp0 + Interface brp0 + type: internal +``` + +``` +$ ip addr show dev brp0 +4: brp0: mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 52:54:00:a1:b2:c3 brd ff:ff:ff:ff:ff:ff + inet 192.0.2.10/24 brd 192.0.2.255 scope global brp0 + valid_lft forever preferred_lft forever + +$ ip addr show dev vtep0 +5: vtep0: mtu 1450 qdisc noqueue state UNKNOWN group default qlen 1000 + link/ether 52:54:00:d4:e5:f6 brd ff:ff:ff:ff:ff:ff + inet 198.51.100.10/24 brd 198.51.100.255 scope global vtep0 + valid_lft forever preferred_lft forever +``` + +On the DPU host with node name dpu-host, the IP address is set as + +``` +$ ip addr show dev enp1s0f0 +2: enp1s0f0: mtu 1500 qdisc fq_codel state UP group default qlen 1000 + link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff + inet 203.0.113.10/24 brd 203.0.113.255 scope global eth0 + valid_lft forever preferred_lft forever + +$ ip route show default +default via 203.0.113.1 dev enp1s0f0 proto static +``` + +Router subnet is 203.0.113.0/24 + +The required OVS settings are as below. The values provided are taken from the above example. + +``` +other_config:hw-offload=true - enable hardware offloading +external_ids:host-k8s-nodename="dpu-host" - name of DPU-Host node +external_ids:hostname="dpu" - OVN Chassis hostname of the DPU +external_ids:ovn-encap-ip="198.51.100.10" - encapsulation IP of the DPU +external_ids:ovn-encap-type="geneve" - supported encapsulation type +external_ids:ovn-gw-interface="brp0" - interface on the DPU that serves as gateway interface +external_ids:ovn-gw-nexthop="203.0.113.1" - default gateway address for the DPU-Host network +external_ids:ovn-gw-router-subnet="203.0.113.0/24" - subnet to be used for the gateway router if DPU is in a different subnet than DPU-Host network +external_ids:ovn-gw-vlanid="3" - optional setting if VLAN id of gateway is not on native VLAN +``` + +## Launching OVN K8s DPU cluster + +Once the DPU-host cluster is deployed, the credentials to access that cluster is needed for DPU cluster deployment. It also requires additional information regarding OVN K8s configuration. + +Use values-single-node-zone-dpu.yaml for deploying the DPU cluster. Only the ovnkube-single-node-zone-dpu chart has to be installed and is enabled by default. The rest of the charts are disabled by setting them to false under the tags section and it should not be changed. + +Set the following field as specified. +```yaml +global: + enableOvnKubeIdentity: false # This feature is not supported currently for clusters with DPU/DPU-Hosts +``` + +The following DPU Host cluster related information must be provided. +```yaml +global: + dpuHostClusterK8sAPIServer: "https://172.25.0.2:6443" # Endpoint of DPU Host cluster's K8s API server + dpuHostClusterK8sToken: "" # DPU Host cluster's K8s Access Token base64 decoded + dpuHostClusterK8sCACertData: "" # DPU Host cluster's encoded K8s Access Certs Data + dpuHostClusterNetworkCIDR: "10.244.0.0/16/24" # DPU Host cluster's Network CIDR + dpuHostClusterServiceCIDR: "10.96.0.0/16" # DPU Host cluster's Service CIDR + mtu: "1400" # MTU of network interface in K8s pod +``` + +ovn-kubernetes image to be used in the containers should be provided in the dpuImage section. It should be built for arm64 architecture. +```yaml +global: + dpuImage: + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu + tag: master +``` + +The rest of the fields can be set as needed. + +Launch OVN K8s using +``` +helm install ovn-kubernetes . -f values-single-node-zone-dpu.yaml +``` diff --git a/go-controller/pkg/allocator/id/allocator.go b/go-controller/pkg/allocator/id/allocator.go index ef4900c7ce..20ed9798b4 100644 --- a/go-controller/pkg/allocator/id/allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -16,7 +16,7 @@ const ( type Allocator interface { AllocateID(name string) (int, error) ReserveID(name string, id int) error - ReleaseID(name string) + ReleaseID(name string) int ForName(name string) NamedAllocator GetID(name string) int } @@ -25,7 +25,7 @@ type Allocator interface { type NamedAllocator interface { AllocateID() (int, error) ReserveID(int) error - ReleaseID() + ReleaseID() int } // idAllocator is used to allocate id for a resource and store the resource - id in a map @@ -90,15 +90,18 @@ func (idAllocator *idAllocator) ReserveID(name string, id int) error { return nil } -// ReleaseID releases the id allocated for the resource 'name' -func (idAllocator *idAllocator) ReleaseID(name string) { +// ReleaseID releases the id allocated for the resource 'name'. +// Returns the released id, or -1 if no id was allocated for that name. +func (idAllocator *idAllocator) ReleaseID(name string) int { idAllocator.nameIdMap.LockKey(name) defer idAllocator.nameIdMap.UnlockKey(name) v, ok := idAllocator.nameIdMap.Load(name) if ok { idAllocator.idBitmap.Release(v) idAllocator.nameIdMap.Delete(name) + return v } + return invalidID } func (idAllocator *idAllocator) ForName(name string) NamedAllocator { @@ -129,8 +132,8 @@ func (allocator *namedAllocator) ReserveID(id int) error { return allocator.allocator.ReserveID(allocator.name, id) } -func (allocator *namedAllocator) ReleaseID() { - allocator.allocator.ReleaseID(allocator.name) +func (allocator *namedAllocator) ReleaseID() int { + return allocator.allocator.ReleaseID(allocator.name) } // idsAllocator is used to allocate multiple ids for a resource and store the resource - ids in a map diff --git a/go-controller/pkg/allocator/id/allocator_test.go b/go-controller/pkg/allocator/id/allocator_test.go index 79b783fbf8..2803d6ea81 100644 --- a/go-controller/pkg/allocator/id/allocator_test.go +++ b/go-controller/pkg/allocator/id/allocator_test.go @@ -5,6 +5,43 @@ import ( "testing" ) +func TestIDAllocator_ReleaseID(t *testing.T) { + t.Run("returns allocated ID when releasing", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + id, err := allocator.AllocateID("resource1") + if err != nil { + t.Fatalf("AllocateID() unexpected error: %v", err) + } + + got := allocator.ReleaseID("resource1") + if got != id { + t.Errorf("ReleaseID() = %d, want %d", got, id) + } + if allocator.GetID("resource1") != -1 { + t.Error("GetID() should return -1 after release") + } + }) + + t.Run("returns -1 when releasing already released resource", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + if _, err := allocator.AllocateID("resource1"); err != nil { + t.Fatalf("AllocateID() unexpected error: %v", err) + } + allocator.ReleaseID("resource1") + + if got := allocator.ReleaseID("resource1"); got != -1 { + t.Errorf("ReleaseID() = %d, want -1", got) + } + }) + + t.Run("returns -1 when releasing non-existent resource", func(t *testing.T) { + allocator := NewIDAllocator("test", 10) + if got := allocator.ReleaseID("nonexistent"); got != -1 { + t.Errorf("ReleaseID() = %d, want -1", got) + } + }) +} + func TestIDsAllocator(t *testing.T) { // create allocator with range [3, 8] allocator := newIDsAllocator("test", 6, 3) diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 41b1f6d3f9..8159c39da7 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -64,8 +64,9 @@ func (a *idAllocatorStub) ReserveID(int) error { return a.reserveIDError } -func (a *idAllocatorStub) ReleaseID() { +func (a *idAllocatorStub) ReleaseID() int { a.releasedID = true + return a.nextID } type persistentIPsStub struct { diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 7d3d1b44c8..12ce158aa0 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -23,6 +23,7 @@ import ( udntemplate "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" networkconnectclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/clusternetworkconnect/v1/apis/clientset/versioned" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" @@ -160,6 +161,10 @@ func NewClusterManager( } if util.IsNetworkSegmentationSupportEnabled() { + var vtepInformer vtepinformer.VTEPInformer + if util.IsEVPNEnabled() { + vtepInformer = wf.VTEPInformer() + } udnController := udncontroller.New( ovnClient.NetworkAttchDefClient, wf.NADInformer(), ovnClient.UserDefinedNetworkClient, @@ -168,6 +173,7 @@ func NewClusterManager( cm.networkManager.Interface(), wf.PodCoreInformer(), wf.NamespaceInformer(), + vtepInformer, cm.recorder, ) cm.userDefinedNetworkController = udnController diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index 7b0f2a2e39..ded7851375 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -531,7 +531,26 @@ func (eIPC *egressIPClusterController) getSortedEgressData() ([]*egressNode, map return assignableNodes, allAllocations } -func (eIPC *egressIPClusterController) initEgressNodeReachability(_ []interface{}) error { +func (eIPC *egressIPClusterController) initEgressNodeReachability(objs []interface{}) error { + for _, obj := range objs { + node := obj.(*corev1.Node) + if err := eIPC.initEgressIPAllocator(node); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + } + + // Before reconciling unassigned EgressIPs, ensure the allocator cache is populated + // with existing assignments from EgressIP statuses. This prevents duplicate IP + // assignments when two EgressIPs have the same IP in their specs but only one has + // it assigned in status (e.g., after control-plane restart or during initial sync). + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs { + eIPC.ensureAllocatorEgressIPAssignments(egressIP) + } + go eIPC.checkEgressNodesReachability() return nil } @@ -990,11 +1009,6 @@ func (eIPC *egressIPClusterController) reconcileEgressIP(old, new *egressipv1.Eg statusToRemove = append(statusToRemove, status) ipsToRemove.Insert(status.EgressIP) } - // Adding the mark to annotations is bundled with status update in-order to minimise updates, cover the case where there is no update to status - // and mark annotation has been modified / removed. This should only occur for an update and the mark was previous set. - if ipsToAssign.Len() == 0 && ipsToRemove.Len() == 0 { - eIPC.ensureMark(old, new) - } if ipsToRemove.Len() > 0 { // The following is added as to ensure that we only add after having @@ -1825,10 +1839,21 @@ func generateStatusPatchOp(statusItems []egressipv1.EgressIPStatusItem) jsonPatc } } +// ensureAllocatorEgressIPAssignments adds EgressIP assignments to the allocator cache +// if the EgressIP has status items. This is critical to prevent duplicate IP assignments +// during restart when EgressIPs are processed in arbitrary order. +func (eIPC *egressIPClusterController) ensureAllocatorEgressIPAssignments(egressIP *egressipv1.EgressIP) { + if len(egressIP.Status.Items) > 0 { + eIPC.addAllocatorEgressIPAssignments(egressIP.Name, egressIP.Status.Items) + } +} + // syncEgressIPMarkAllocator iterates over all existing EgressIPs. It builds a mark cache of existing marks stored on each -// EgressIP annotation or allocates and adds a new mark to an EgressIP if it doesn't exist +// EgressIP annotation or allocates and adds a new mark to an EgressIP if it doesn't exist. func (eIPC *egressIPClusterController) syncEgressIPMarkAllocator(egressIPs []interface{}) error { - // reserve previously assigned marks + // Reserve previously assigned marks. Note: the allocator cache is pre-populated with + // existing assignments from EgressIP statuses in initEgressNodeReachability, which runs + // before this sync function. for _, object := range egressIPs { egressIP, ok := object.(*egressipv1.EgressIP) if !ok { @@ -1880,22 +1905,6 @@ func getEgressIPMarkAllocator() id.Allocator { return id.NewIDAllocator("eip_mark", eipMarkMax-eipMarkMin) } -// ensureMark ensures that if a mark was remove or changed value, then restore the mark. -func (eIPC *egressIPClusterController) ensureMark(old, new *egressipv1.EgressIP) { - // Adding the mark to annotations is bundled with status update in-order to minimise updates, cover the case where there is no update to status - // and mark annotation has been modified / removed. This should only occur for an update and the mark was previous set. - if old != nil && new != nil { - if util.IsEgressIPMarkSet(old.Annotations) && util.EgressIPMarkAnnotationChanged(old.Annotations, new.Annotations) { - mark, _, err := eIPC.getOrAllocMark(new.Name) - if err != nil { - klog.Errorf("Failed to restore EgressIP %s mark because unable to retrieve mark: %v", new.Name, err) - } else if err = eIPC.patchEgressIP(new.Name, generateMarkPatchOp(mark)); err != nil { - klog.Errorf("Failed to restore EgressIP %s mark because patching failed: %v", new.Name, err) - } - } - } -} - // getOrAllocMark allocates a new mark integer for name using round-robin strategy if none was already allocated for name otherwise // returns the previously allocated mark. // The mark is bounded by util.EgressIPMarkBase & util.EgressIPMarkMax inclusive. diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go index c89b8b58cf..593671f7b8 100644 --- a/go-controller/pkg/clustermanager/egressip_controller_test.go +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/util/retry" utilnet "k8s.io/utils/net" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -3288,24 +3287,7 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { assignedMark, err := strconv.Atoi(assignedMarkStr) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to convert mark to string") - ginkgo.By("clear mark to cause update and expect restoration of mark") - gomega.Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { - eIP, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - if err != nil { - return err - } - eIP.Annotations = map[string]string{} - _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIP, metav1.UpdateOptions{}) - return err - })).ShouldNot(gomega.HaveOccurred(), "failed to update EgressIP object") - ginkgo.By("confirm the original mark is restored") - gomega.Eventually(getEgressIPAnnotationValue(eIP.Name)).ShouldNot(gomega.BeEmpty()) - assignedMarkStr, err = getEgressIPAnnotationValue(eIP.Name)() - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to get egress IP mark from annotations") - assignedMarkAfterUpdate, err := strconv.Atoi(assignedMarkStr) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to convert mark to string") - gomega.Expect(assignedMark).Should(gomega.Equal(assignedMarkAfterUpdate), "Mark should be identical if annotation is cleared") - ginkgo.By("confirm cache is unchanged") + ginkgo.By("confirm cache is set correctly") cachedMark, _, err := fakeClusterManagerOVN.eIPC.getOrAllocMark(eIP.Name) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Expect(cachedMark).Should(gomega.Equal(assignedMark), "EIP annotation and cache mark integer must be the same") @@ -4218,6 +4200,122 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + // This test validates that when two EgressIP CRs have the same IP in their specs, + // and one already has the IP assigned in status (from before restart), the sync + // function properly pre-populates the allocator cache to prevent duplicate assignment. + // This is a regression test for the bug where duplicate IPs were assigned during + // control-plane pod restart because the allocator cache wasn't populated from + // existing EgressIP statuses before processing individual ADD events. + ginkgo.It("should not assign duplicate IP during restart when two EgressIPs have same IP in spec", func() { + app.Action = func(*cli.Context) error { + duplicateIP := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" + + node1 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + node2 := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\": [\"%s\",\"%s\"]}", v4NodeSubnet, v6NodeSubnet), + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + + // eIP1 has the IP assigned in status (simulating state from before restart) + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip-1"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{duplicateIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: duplicateIP, + Node: node1Name, + }, + }, + }, + } + + // eIP2 has the same IP in spec but NOT in status (unassigned, but was created + // with duplicate IP - which should have been rejected but wasn't due to a bug + // or manual API manipulation) + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip-2"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{duplicateIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &corev1.NodeList{Items: []corev1.Node{node1, node2}}, + // Both EgressIPs exist at startup - simulating restart scenario + &egressipv1.EgressIPList{Items: []egressipv1.EgressIP{eIP1, eIP2}}, + ) + + // Use WatchEgressNodes to properly initialize the allocator cache + // (simulating real startup behavior rather than manually setting up cache) + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // eIP1 should keep its assignment (the IP was already assigned) + gomega.Eventually(getEgressIPStatusLen("egressip-1")).Should(gomega.Equal(1)) + egressIPs1, nodes1 := getEgressIPStatus("egressip-1") + gomega.Expect(nodes1[0]).To(gomega.Equal(node1Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(duplicateIP)) + + // eIP2 should NOT get the duplicate IP assigned (not even to node2) - + // it should remain unassigned because initEgressNodeReachability pre-populated the + // cache with eIP1's assignment + gomega.Eventually(getEgressIPStatusLen("egressip-2")).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("AddEgressIP for IPv4", func() { diff --git a/go-controller/pkg/clustermanager/node/node_allocator.go b/go-controller/pkg/clustermanager/node/node_allocator.go index e31625b725..5f1ba6ccc5 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator.go +++ b/go-controller/pkg/clustermanager/node/node_allocator.go @@ -343,7 +343,6 @@ func (na *NodeAllocator) syncNodeNetworkAnnotations(node *corev1.Node) error { func (na *NodeAllocator) HandleDeleteNode(node *corev1.Node) error { if na.hasHybridOverlayAllocation() { na.releaseHybridOverlayNodeSubnet(node.Name) - return nil } if na.hasNodeSubnetAllocation() || na.hasHybridOverlayAllocationUnmanaged() { diff --git a/go-controller/pkg/clustermanager/node/node_allocator_test.go b/go-controller/pkg/clustermanager/node/node_allocator_test.go index 37fee60d64..acdbc137bb 100644 --- a/go-controller/pkg/clustermanager/node/node_allocator_test.go +++ b/go-controller/pkg/clustermanager/node/node_allocator_test.go @@ -12,7 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/listers/core/v1" + listersv1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" @@ -400,12 +400,12 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { } } -func newFakeNodeLister(nodes []*corev1.Node) v1.NodeLister { +func newFakeNodeLister(nodes []*corev1.Node) listersv1.NodeLister { indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) for _, node := range nodes { _ = indexer.Add(node) } - return v1.NewNodeLister(indexer) + return listersv1.NewNodeLister(indexer) } func TestController_CleanupStaleAnnotation(t *testing.T) { @@ -448,3 +448,105 @@ func TestController_CleanupStaleAnnotation(t *testing.T) { t.Fatalf("Expected annotation %s to be cleaned up, got %v", util.OVNNodeGRLRPAddrs, nodes.Items[0].Annotations) } } + +// TestNodeAllocator_HandleDeleteNode verifies that HandleDeleteNode correctly releases +// both standard cluster subnets and hybrid overlay subnets (if enabled) when a node is deleted. +func TestNodeAllocator_HandleDeleteNode(t *testing.T) { + origHybridEnabled := config.HybridOverlay.Enabled + origHybridSubnets := config.HybridOverlay.ClusterSubnets + origClusterSubnets := config.Default.ClusterSubnets + origNoHostSubnetNodes := config.Kubernetes.NoHostSubnetNodes + t.Cleanup(func() { + config.HybridOverlay.Enabled = origHybridEnabled + config.HybridOverlay.ClusterSubnets = origHybridSubnets + config.Default.ClusterSubnets = origClusterSubnets + config.Kubernetes.NoHostSubnetNodes = origNoHostSubnetNodes + }) + + config.HybridOverlay.Enabled = true + config.HybridOverlay.ClusterSubnets = []config.CIDRNetworkEntry{ + {CIDR: ovntest.MustParseIPNet("10.0.0.0/16"), HostSubnetLength: 24}, + } + + ranges, err := rangesFromStrings([]string{"172.16.0.0/16"}, []int{24}) + if err != nil { + t.Fatal(err) + } + config.Default.ClusterSubnets = ranges + + netInfo, err := util.NewNetInfo( + &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: types.DefaultNetworkName}, + }, + ) + if err != nil { + t.Fatal(err) + } + + na := &NodeAllocator{ + netInfo: netInfo, + clusterSubnetAllocator: NewSubnetAllocator(), + nodeLister: newFakeNodeLister([]*corev1.Node{}), + } + if na.hasHybridOverlayAllocation() { + na.hybridOverlaySubnetAllocator = NewSubnetAllocator() + } + + if !na.hasHybridOverlayAllocation() { + t.Fatal("Hybrid overlay allocation should be enabled given the test configuration") + } + + if err := na.Init(); err != nil { + t.Fatalf("Failed to initialize node allocator: %v", err) + } + + nodeName := "node-delete-test" + if !na.hasNodeSubnetAllocation() { + t.Fatal("Node subnet allocation should be enabled") + } + + allocated, _, err := na.allocateNodeSubnets(na.clusterSubnetAllocator, nodeName, nil, true, false) + if err != nil { + t.Fatalf("Failed to allocate subnet: %v", err) + } + if len(allocated) == 0 { + t.Fatal("No subnet allocated") + } + + v4used, _ := na.clusterSubnetAllocator.Usage() + if v4used != 1 { + t.Fatalf("Expected 1 allocated subnet, got %d", v4used) + } + + if na.hasHybridOverlayAllocation() { + if _, _, err := na.allocateNodeSubnets(na.hybridOverlaySubnetAllocator, nodeName, nil, true, false); err != nil { + t.Fatalf("Failed to allocate hybrid overlay subnet: %v", err) + } + hoUsed, _ := na.hybridOverlaySubnetAllocator.Usage() + if hoUsed != 1 { + t.Fatalf("Expected 1 allocated hybrid overlay subnet, got %d", hoUsed) + } + } + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + } + + if err := na.HandleDeleteNode(node); err != nil { + t.Fatalf("HandleDeleteNode failed: %v", err) + } + + v4usedAfter, _ := na.clusterSubnetAllocator.Usage() + if v4usedAfter != 0 { + t.Errorf("Subnet leak detected! Expected 0 allocated subnets, got %d", v4usedAfter) + } + + if na.hasHybridOverlayAllocation() { + hoUsedAfter, _ := na.hybridOverlaySubnetAllocator.Usage() + if hoUsedAfter != 0 { + t.Errorf("Hybrid overlay subnet leak detected! Expected 0 allocated subnets, got %d", hoUsedAfter) + } + } +} diff --git a/go-controller/pkg/clustermanager/pod/allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go index 987c401508..e11f287d8f 100644 --- a/go-controller/pkg/clustermanager/pod/allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -147,8 +147,9 @@ func (a *idAllocatorStub) ReserveID(string, int) error { panic("not implemented") // TODO: Implement } -func (a *idAllocatorStub) ReleaseID(string) { +func (a *idAllocatorStub) ReleaseID(string) int { a.released = true + return 0 } func (a *idAllocatorStub) ForName(string) id.NamedAllocator { diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller.go b/go-controller/pkg/clustermanager/routeadvertisements/controller.go index 463a5d0d67..75ce469089 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller.go @@ -50,6 +50,8 @@ import ( const ( generateName = "ovnk-generated-" fieldManager = "clustermanager-routeadvertisements-controller" + // evpnRawConfigPriority is set to an arbitrary value that still allows users to override EVPN config if needed. + evpnRawConfigPriority = 10 ) var ( @@ -324,6 +326,33 @@ type selectedNetworks struct { prefixLength map[string]uint32 // networkType is a map of selected network to their topology networkTopology map[string]string + // macVRFConfigs is an ordered list of MAC-VRF EVPN configurations for selected networks + macVRFConfigs []*vrfConfig + // ipVRFConfigs is an ordered list of IP-VRF EVPN configurations for selected networks + ipVRFConfigs []*ipVRFConfig + // networkTransport is a map of selected network to their transport mode + networkTransport map[string]string +} + +// vrfConfig holds base VRF EVPN configuration for a network +type vrfConfig struct { + // VNI is the VXLAN Network Identifier + VNI int32 + // RouteTarget is the BGP route target, empty means use FRR defaults + RouteTarget string +} + +// ipVRFConfig holds IP-VRF EVPN configuration for a network +type ipVRFConfig struct { + vrfConfig + // NetworkName is the name of the network this config belongs to + NetworkName string + // VRFName is the Linux VRF name + VRFName string + // HasIPv4 indicates if the network has IPv4 subnets + HasIPv4 bool + // HasIPv6 indicates if the network has IPv6 subnets + HasIPv6 bool } // generateFRRConfigurations generates FRRConfigurations for the route @@ -351,10 +380,11 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) // validate and gather information about the networks networkSet := sets.New[string]() selectedNetworks := &selectedNetworks{ - networkVRFs: map[string]string{}, - networkSubnets: map[string][]string{}, - prefixLength: map[string]uint32{}, - networkTopology: map[string]string{}, + networkVRFs: map[string]string{}, + networkSubnets: map[string][]string{}, + prefixLength: map[string]uint32{}, + networkTopology: map[string]string{}, + networkTransport: map[string]string{}, } for _, nad := range nads { networkName := util.GetAnnotatedNetworkName(nad) @@ -385,6 +415,43 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) selectedNetworks.vrfs = append(selectedNetworks.vrfs, vrf) selectedNetworks.networkVRFs[vrf] = networkName selectedNetworks.networkTopology[networkName] = network.TopologyType() + selectedNetworks.networkTransport[networkName] = network.Transport() + + // MAC-VRF configuration + if macVNI := network.EVPNMACVRFVNI(); macVNI > 0 { + selectedNetworks.macVRFConfigs = append(selectedNetworks.macVRFConfigs, &vrfConfig{ + VNI: macVNI, + RouteTarget: network.EVPNMACVRFRouteTarget(), + }) + } + + // IP-VRF configuration + if ipVNI := network.EVPNIPVRFVNI(); ipVNI > 0 { + // Compute IP families from network subnets + hasIPv4, hasIPv6 := false, false + for _, subnet := range network.Subnets() { + if subnet.CIDR.IP.To4() == nil { + hasIPv6 = true + } else { + hasIPv4 = true + } + } + selectedNetworks.ipVRFConfigs = append(selectedNetworks.ipVRFConfigs, &ipVRFConfig{ + vrfConfig: vrfConfig{ + VNI: ipVNI, + RouteTarget: network.EVPNIPVRFRouteTarget(), + }, + NetworkName: networkName, + VRFName: vrf, + HasIPv4: hasIPv4, + HasIPv6: hasIPv6, + }) + } + hasEVPNConfig := network.EVPNMACVRFVNI() > 0 || network.EVPNIPVRFVNI() > 0 + if hasEVPNConfig && ra.Spec.TargetVRF != "auto" && ra.Spec.TargetVRF != vrf { + return nil, nil, fmt.Errorf("%w: EVPN network %q with VRF %q requires TargetVRF to be 'auto' or %q, got %q", + errConfig, networkName, vrf, vrf, ra.Spec.TargetVRF) + } // TODO check overlaps? for _, cidr := range network.Subnets() { subnet := cidr.CIDR.String() @@ -399,6 +466,8 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) // ordered slices.Sort(selectedNetworks.vrfs) slices.Sort(selectedNetworks.subnets) + slices.SortFunc(selectedNetworks.macVRFConfigs, func(a, b *vrfConfig) int { return int(a.VNI - b.VNI) }) + slices.SortFunc(selectedNetworks.ipVRFConfigs, func(a, b *ipVRFConfig) int { return int(a.VNI - b.VNI) }) selectedNetworks.networks = sets.List(networkSet) // gather selected nodes @@ -435,6 +504,8 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) if len(frrConfigs) == 0 { return nil, nil, fmt.Errorf("%w: no FRRConfigurations selected", errPending) } + + frrRouterVRFs := sets.New[string]() for _, frrConfig := range frrConfigs { if strings.HasPrefix(frrConfig.Name, generateName) { klog.V(4).Infof("Skipping FRRConfiguration %q selected by RouteAdvertisements %q as it was generated by ovn-kubernetes", frrConfig.Name, ra.Name) @@ -455,6 +526,27 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) } nodeToFRRConfig[node.Name] = append(nodeToFRRConfig[node.Name], frrConfig) } + for _, router := range frrConfig.Spec.BGP.Routers { + frrRouterVRFs.Insert(router.VRF) + } + } + + // Validate EVPN configuration requirements + hasEVPNConfig := len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 + if hasEVPNConfig && !util.IsEVPNEnabled() { + return nil, nil, fmt.Errorf("%w: EVPN networks selected but EVPN feature is not enabled", errConfig) + } + // Require a router with default VRF for any EVPN configuration, since the + // global EVPN section with advertise-all-vni is required for EVPN to work properly. + if hasEVPNConfig && !frrRouterVRFs.Has("") { + return nil, nil, fmt.Errorf("%w: EVPN requires a router with default VRF but none were found in selected FRRConfigurations", errConfig) + } + // Validate IP-VRF networks: each needs either an existing VRF router or + // the default VRF router to create one from. + for _, cfg := range selectedNetworks.ipVRFConfigs { + if !frrRouterVRFs.Has(cfg.VRFName) && !frrRouterVRFs.Has("") { + return nil, nil, fmt.Errorf("%w: IP-VRF EVPN network %q requires a router with VRF %q or a router with default VRF, but none were found in selected FRRConfigurations", errConfig, cfg.NetworkName, cfg.VRFName) + } } // helper to gather host subnets and cache during reconcile @@ -562,6 +654,7 @@ func (c *Controller) generateFRRConfigurations(ra *ratypes.RouteAdvertisements) nodeName, selectedNetworks, matchedNetworks, + frrRouterVRFs, ) if err != nil { return nil, nil, err @@ -591,8 +684,9 @@ func (c *Controller) generateFRRConfiguration( nodeName string, selectedNetworks *selectedNetworks, matchedNetworks sets.Set[string], + frrRouterVRFs sets.Set[string], ) (*frrtypes.FRRConfiguration, error) { - routers := []frrtypes.Router{} + var routers []frrtypes.Router // go over the source routers for i, router := range source.Spec.BGP.Routers { @@ -670,6 +764,32 @@ func (c *Controller) generateFRRConfiguration( Prefixes: advertisePrefixes, }, } + + // For no-overlay networks, add routes to pod subnets to the accepted routes list + // frr-k8s will merge the prefixes from both the generated and the base FRRConfiguration + if selectedNetworks.networkTransport[matchedNetwork] == types.NetworkTransportNoOverlay { + // Get the pod subnets for this network (the network subnets, not host subnets) + podSubnets := selectedNetworks.networkSubnets[matchedNetwork] + if len(podSubnets) > 0 { + // Filter pod subnets by IP family to match the neighbor + filteredPodSubnets := util.MatchAllIPNetsStringFamily(isIPV6, podSubnets) + if len(filteredPodSubnets) > 0 { + neighbor.ToReceive = frrtypes.Receive{ + Allowed: frrtypes.AllowedInPrefixes{ + Mode: frrtypes.AllowRestricted, + }, + } + for _, subnet := range filteredPodSubnets { + neighbor.ToReceive.Allowed.Prefixes = append(neighbor.ToReceive.Allowed.Prefixes, frrtypes.PrefixSelector{ + Prefix: subnet, + LE: selectedNetworks.prefixLength[subnet], + GE: selectedNetworks.prefixLength[subnet], + }) + } + } + } + } + targetRouter.Neighbors = append(targetRouter.Neighbors, neighbor) } if len(targetRouter.Neighbors) == 0 { @@ -720,11 +840,65 @@ func (c *Controller) generateFRRConfiguration( routers = append(routers, importRouter) } } - if len(routers) == 0 { - // we ended up with no routers, bail out - return nil, nil + var globalRouterASN uint32 + var neighbors []string + vrfASNs := map[string]uint32{} + + if len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 { + // Look for global router in the source FRRConfiguration, not in the filtered routers + for _, router := range source.Spec.BGP.Routers { + if router.VRF == "" { // default VRF + globalRouterASN = router.ASN + for _, neighbor := range router.Neighbors { + neighbors = append(neighbors, neighbor.Address) + } + break + } + } + } + + // For IP-VRF: Find or create routers for each EVPN network's VRF. + // IP-VRF routers don't need neighbors for EVPN (they use the global router's neighbors). + for _, cfg := range selectedNetworks.ipVRFConfigs { + if frrRouterVRFs.Has(cfg.VRFName) { + // VRF router exists somewhere - check if it's in the current source + for _, router := range source.Spec.BGP.Routers { + if router.VRF == cfg.VRFName { + vrfASNs[cfg.VRFName] = router.ASN + if !slices.ContainsFunc(routers, func(r frrtypes.Router) bool { return r.VRF == cfg.VRFName }) { + routers = append(routers, frrtypes.Router{ + ASN: router.ASN, + VRF: cfg.VRFName, + Prefixes: selectedNetworks.hostNetworkSubnets[cfg.NetworkName], + }) + } + break + } + } + // If not in current source, another source will handle it + } else if globalRouterASN > 0 { + // VRF router doesn't exist anywhere - create with global ASN + klog.Infof("Creating router for EVPN network %q VRF %q with ASN=%d, prefixes=%v", + cfg.NetworkName, cfg.VRFName, globalRouterASN, selectedNetworks.hostNetworkSubnets[cfg.NetworkName]) + matchedNetworks.Insert(cfg.NetworkName) + vrfASNs[cfg.VRFName] = globalRouterASN + routers = append(routers, frrtypes.Router{ + ASN: globalRouterASN, + VRF: cfg.VRFName, + Prefixes: selectedNetworks.hostNetworkSubnets[cfg.NetworkName], + }) + } } + // Check if we have anything to generate: routers or EVPN raw config. + // EVPN raw config is generated when we have: + // - A global router (globalRouterASN > 0 && len(neighbors) > 0) for the global EVPN section + // - IP-VRF configs for VRF VNI and VRF EVPN sections + hasEVPNRawConfig := (globalRouterASN > 0 && len(neighbors) > 0) || len(selectedNetworks.ipVRFConfigs) > 0 + if len(routers) == 0 && !hasEVPNRawConfig { + // we ended up with no routers and no EVPN raw config to generate, bail out + return nil, nil + } new := &frrtypes.FRRConfiguration{} new.GenerateName = generateName new.Namespace = source.Namespace @@ -748,6 +922,18 @@ func (c *Controller) generateFRRConfiguration( }, } + // Generate EVPN raw config for the EVPN-specific parts. + // TODO: once frr-k8s provides a typed EVPN API, we can use that instead of raw config + if len(selectedNetworks.macVRFConfigs) > 0 || len(selectedNetworks.ipVRFConfigs) > 0 { + rawConfig := generateEVPNRawConfig(selectedNetworks, globalRouterASN, neighbors, vrfASNs) + if rawConfig != "" { + new.Spec.Raw = frrtypes.RawConfig{ + Priority: evpnRawConfigPriority, + Config: rawConfig, + } + } + } + return new, nil } diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index a6b8e8b664..1bad4f1ad5 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -2,6 +2,7 @@ package routeadvertisements import ( "context" + "encoding/json" "fmt" "strings" "sync" @@ -148,11 +149,18 @@ func (tn testNode) Node() *corev1.Node { } } +type testPrefixSelector struct { + Prefix string + LE uint32 + GE uint32 +} + type testNeighbor struct { ASN uint32 Address string DisableMP *bool Advertise []string + Receive []testPrefixSelector } func (tn testNeighbor) Neighbor() frrapi.Neighbor { @@ -170,6 +178,22 @@ func (tn testNeighbor) Neighbor() frrapi.Neighbor { if tn.DisableMP != nil { n.DisableMP = *tn.DisableMP } + if len(tn.Receive) > 0 { + prefixSelectors := make([]frrapi.PrefixSelector, 0, len(tn.Receive)) + for _, ps := range tn.Receive { + prefixSelectors = append(prefixSelectors, frrapi.PrefixSelector{ + Prefix: ps.Prefix, + LE: ps.LE, + GE: ps.GE, + }) + } + n.ToReceive = frrapi.Receive{ + Allowed: frrapi.AllowedInPrefixes{ + Mode: frrapi.AllowRestricted, + Prefixes: prefixSelectors, + }, + } + } return n } @@ -198,14 +222,16 @@ func (tr testRouter) Router() frrapi.Router { } type testFRRConfig struct { - Name string - Namespace string - Generation int - Labels map[string]string - Annotations map[string]string - Routers []*testRouter - NodeSelector map[string]string - OwnUpdate bool + Name string + Namespace string + Generation int + Labels map[string]string + Annotations map[string]string + Routers []*testRouter + NodeSelector map[string]string + OwnUpdate bool + RawConfig string + RawConfigPriority int } func (tf testFRRConfig) FRRConfiguration() *frrapi.FRRConfiguration { @@ -226,6 +252,10 @@ func (tf testFRRConfig) FRRConfiguration() *frrapi.FRRConfiguration { for _, r := range tf.Routers { f.Spec.BGP.Routers = append(f.Spec.BGP.Routers, r.Router()) } + if tf.RawConfig != "" { + f.Spec.Raw.Config = tf.RawConfig + f.Spec.Raw.Priority = tf.RawConfigPriority + } if tf.OwnUpdate { f.ManagedFields = append(f.ManagedFields, metav1.ManagedFieldsEntry{ Manager: fieldManager, @@ -264,15 +294,19 @@ func (te testEIP) EgressIP() *eiptypes.EgressIP { } type testNAD struct { - Name string - Namespace string - Network string - Subnet string - Labels map[string]string - Annotations map[string]string - IsSecondary bool - Topology string - OwnUpdate bool + Name string + Namespace string + Network string + Subnet string + Labels map[string]string + Annotations map[string]string + IsSecondary bool + Topology string + OwnUpdate bool + EVPNMACVRFVNI int32 + EVPNMACVRFRouteTarget string + EVPNIPVRFVNI int32 + EVPNIPVRFRouteTarget string } func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { @@ -295,27 +329,52 @@ func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { ) nad.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ownerRef} } - topology := tn.Topology - switch { - case tn.IsSecondary: - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\", \"topology\": \"%s\", \"netAttachDefName\": \"%s\", \"subnets\": \"%s\"}", - tn.Network, - config.CNI.Plugin, - topology, - tn.Namespace+"/"+tn.Name, - tn.Subnet, - ) - case tn.Topology != "": - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\", \"topology\": \"%s\", \"netAttachDefName\": \"%s\", \"role\": \"primary\", \"subnets\": \"%s\"}", - tn.Network, - config.CNI.Plugin, - topology, - tn.Namespace+"/"+tn.Name, - tn.Subnet, - ) - default: - nad.Spec.Config = fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"%s\", \"type\": \"%s\"}", tn.Network, config.CNI.Plugin) + + // Build the config as a map to properly marshal EVPN config + cniConfig := map[string]interface{}{ + "cniVersion": "0.4.0", + "name": tn.Network, + "type": config.CNI.Plugin, + "netAttachDefName": tn.Namespace + "/" + tn.Name, + } + + if tn.Topology != "" { + cniConfig["topology"] = tn.Topology + } + if tn.Subnet != "" { + cniConfig["subnets"] = tn.Subnet + } + if tn.Topology != "" && !tn.IsSecondary { + cniConfig["role"] = "primary" + } + + // Add EVPN configuration if present + if tn.EVPNMACVRFVNI > 0 || tn.EVPNIPVRFVNI > 0 { + evpnConfig := map[string]interface{}{} + if tn.EVPNMACVRFVNI > 0 { + macvrf := map[string]interface{}{ + "vni": tn.EVPNMACVRFVNI, + } + if tn.EVPNMACVRFRouteTarget != "" { + macvrf["routeTarget"] = tn.EVPNMACVRFRouteTarget + } + evpnConfig["macVRF"] = macvrf + } + if tn.EVPNIPVRFVNI > 0 { + ipvrf := map[string]interface{}{ + "vni": tn.EVPNIPVRFVNI, + } + if tn.EVPNIPVRFRouteTarget != "" { + ipvrf["routeTarget"] = tn.EVPNIPVRFRouteTarget + } + evpnConfig["ipVRF"] = ipvrf + } + cniConfig["evpn"] = evpnConfig } + + configBytes, _ := json.Marshal(cniConfig) + nad.Spec.Config = string(configBytes) + if tn.OwnUpdate { nad.ManagedFields = append(nad.ManagedFields, metav1.ManagedFieldsEntry{ Manager: fieldManager, @@ -372,6 +431,7 @@ func TestController_reconcile(t *testing.T) { namespaces []*testNamespace eips []*testEIP reconcile string + transport string wantErr bool expectAcceptedStatus metav1.ConditionStatus expectFRRConfigs []*testFRRConfig @@ -781,6 +841,37 @@ func TestController_reconcile(t *testing.T) { reconcile: "ra", expectAcceptedStatus: metav1.ConditionTrue, }, + { + name: "reconciles pod RouteAdvertisement for default network in no-overlay mode with ToReceive routes", + ra: &testRA{Name: "ra", AdvertisePods: true, SelectsDefault: true}, + transport: types.NetworkTransportNoOverlay, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.1.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100", Receive: []testPrefixSelector{{Prefix: "1.2.0.0/16"}}}, + }}, + }, + }, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.0.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100", Advertise: []string{"1.1.0.0/24"}, Receive: []testPrefixSelector{{Prefix: "1.1.0.0/16", LE: 24, GE: 24}}}, + }}, + }}, + }, + expectNADAnnotations: map[string]map[string]string{"default": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, { name: "fails to reconcile a secondary network", ra: &testRA{Name: "ra", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, @@ -942,6 +1033,338 @@ func TestController_reconcile(t *testing.T) { reconcile: "ra", expectAcceptedStatus: metav1.ConditionFalse, }, + { + name: "fails to reconcile EVPN-enabled network to default VRF", + ra: &testRA{Name: "ra", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 1, Prefixes: []string{"1.1.1.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 1, Address: "1.0.0.100"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "evpn-net", Namespace: "test", Network: util.GenerateCUDNNetworkName("evpn-net"), + Topology: "layer2", Subnet: "1.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "reconciles EVPN MAC-VRF l2 network with a specific target VRF without a VRF router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000, EVPNMACVRFRouteTarget: "65000:1000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + }, + expectNADAnnotations: map[string]map[string]string{"red": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "reconciles EVPN IP-VRF network with auto target and creates a router", + ra: &testRA{Name: "ra", TargetVRF: "auto", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "blue", Namespace: "blue", Network: util.GenerateCUDNNetworkName("blue"), + Topology: "layer3", Subnet: "10.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 2000, EVPNIPVRFRouteTarget: "65000:2000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_blue\":\"10.2.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfig/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + Routers: []*testRouter{ + {ASN: 65000, VRF: "blue", Prefixes: []string{"10.2.1.0/24"}}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"blue": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "reconciles EVPN IP-VRF with router ASN from another FRRConfiguration", + ra: &testRA{Name: "ra", TargetVRF: "auto", AdvertisePods: true, SelectsDefault: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfigGlobal", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + { + Name: "frrConfigVRF", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65100, VRF: "blue"}, + }, + }, + }, + nads: []*testNAD{ + {Name: "blue", Namespace: "blue", Network: util.GenerateCUDNNetworkName("blue"), + Topology: "layer3", Subnet: "10.2.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 2000, EVPNIPVRFRouteTarget: "65000:2000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\",\"cluster_udn_blue\":\"10.2.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigGlobal/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +`, + Routers: []*testRouter{ + {ASN: 65000, Prefixes: []string{"1.1.0.0/24"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1", Advertise: []string{"1.1.0.0/24"}}, + }}, + }, + }, + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigVRF/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `vrf blue + vni 2000 +exit-vrf +! +router bgp 65100 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + Routers: []*testRouter{ + {ASN: 65100, VRF: "blue", Prefixes: []string{"10.2.1.0/24"}}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"blue": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, + { + name: "fails to reconcile MACVRF EVPN without global router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "fails to reconcile IPVRF EVPN without global router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNIPVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "fails to reconcile EVPN with global router but no neighbors", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfig", + Namespace: frrNamespace, + Routers: []*testRouter{ + {ASN: 65000}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"cluster_udn_red\":\"10.1.1.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionFalse, + }, + { + name: "reconciles EVPN when global router is in a different FRRConfiguration than VRF router", + ra: &testRA{Name: "ra", TargetVRF: "red", AdvertisePods: true, NetworkSelector: map[string]string{"selected": "true"}}, + frrConfigs: []*testFRRConfig{ + { + Name: "frrConfigGlobal", + Namespace: frrNamespace, + Routers: []*testRouter{ + // Global router with neighbors - provides ASN and neighbors for EVPN + {ASN: 65000, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + { + Name: "frrConfigVRF", + Namespace: frrNamespace, + Routers: []*testRouter{ + // VRF-specific router - matches the target VRF + {ASN: 65000, VRF: "red", Prefixes: []string{"10.1.0.0/16"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1"}, + }}, + }, + }, + }, + nads: []*testNAD{ + {Name: "red", Namespace: "red", Network: util.GenerateCUDNNetworkName("red"), + Topology: "layer2", Subnet: "10.1.0.0/16", Labels: map[string]string{"selected": "true"}, + EVPNMACVRFVNI: 1000, EVPNMACVRFRouteTarget: "65000:1000"}, + }, + nodes: []*testNode{{Name: "node", SubnetsAnnotation: "{\"default\":\"1.1.0.0/24\"}"}}, + reconcile: "ra", + expectAcceptedStatus: metav1.ConditionTrue, + expectFRRConfigs: []*testFRRConfig{ + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigGlobal/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + RawConfigPriority: 10, + RawConfig: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + { + Labels: map[string]string{types.OvnRouteAdvertisementsKey: "ra"}, + Annotations: map[string]string{types.OvnRouteAdvertisementsKey: "ra/frrConfigVRF/node"}, + NodeSelector: map[string]string{"kubernetes.io/hostname": "node"}, + Routers: []*testRouter{ + {ASN: 65000, VRF: "red", Prefixes: []string{"10.1.0.0/16"}, Neighbors: []*testNeighbor{ + {ASN: 65000, Address: "192.168.1.1", Advertise: []string{"10.1.0.0/16"}}, + }}, + }, + }, + }, + expectNADAnnotations: map[string]map[string]string{"red": {types.OvnRouteAdvertisementsKey: "[\"ra\"]"}}, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -960,9 +1383,11 @@ func TestController_reconcile(t *testing.T) { HostSubnetLength: 64, }, } + config.Default.Transport = tt.transport config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableRouteAdvertisements = true config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EnableEVPN = true fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() addGenerateNameReactor[*frrfake.Clientset](fakeClientset.FRRClient) @@ -1315,6 +1740,7 @@ func TestUpdates(t *testing.T) { config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableRouteAdvertisements = true config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EnableEVPN = true fakeClientset := util.GetOVNClientset().GetClusterManagerClientset() diff --git a/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go new file mode 100644 index 0000000000..7e3fc9bd47 --- /dev/null +++ b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig.go @@ -0,0 +1,140 @@ +package routeadvertisements + +import ( + "fmt" + "strings" +) + +// generateEVPNRawConfig generates raw FRR configuration for EVPN. +// If asn/neighbors aren't provided the related sections are skipped. +// +// Generated config structure: +// +// router bgp <- genGlobalEVPNSection +// address-family l2vpn evpn +// neighbor activate +// advertise-all-vni +// vni <- (one per MAC-VRF with RT, section only added when MAC-VRF RT is set) +// route-target import +// route-target export +// exit-vni +// exit-address-family +// exit +// ! +// vrf <- genVRFVNISection (one per IP-VRF) +// vni +// exit-vrf +// ! +// router bgp vrf <- genVRFEVPNSection (one per IP-VRF) +// address-family l2vpn evpn +// advertise ipv4 unicast +// advertise ipv6 unicast +// route-target import +// route-target export +// exit-address-family +// exit +// ! +func generateEVPNRawConfig(selected *selectedNetworks, asn uint32, neighbors []string, vrfASNs map[string]uint32) string { + var buf strings.Builder + + if asn > 0 && len(neighbors) > 0 { + buf.WriteString(genGlobalEVPNSection(asn, neighbors, selected.macVRFConfigs)) + } + for _, cfg := range selected.ipVRFConfigs { + buf.WriteString(genVRFVNISection(cfg)) + } + // Generate VRF-specific EVPN sections using each config's ASN + for _, cfg := range selected.ipVRFConfigs { + if vrfASN := vrfASNs[cfg.VRFName]; vrfASN > 0 { + buf.WriteString(genVRFEVPNSection(vrfASN, cfg)) + } + } + return buf.String() +} + +// genVRFVNISection generates VRF-to-VNI mapping. +// +// vrf +// vni +// exit-vrf +// ! +func genVRFVNISection(cfg *ipVRFConfig) string { + return fmt.Sprintf(`vrf %s + vni %d +exit-vrf +! +`, cfg.VRFName, cfg.VNI) +} + +// genGlobalEVPNSection generates the global router's EVPN address-family. +// +// router bgp +// address-family l2vpn evpn +// neighbor activate +// advertise-all-vni +// vni <- (Section only added when MAC-VRF RT is set) +// route-target import +// route-target export +// exit-vni +// exit-address-family +// exit +// ! +func genGlobalEVPNSection(asn uint32, neighbors []string, macVRFs []*vrfConfig) string { + var buf strings.Builder + + fmt.Fprintf(&buf, "router bgp %d\n", asn) + buf.WriteString(" address-family l2vpn evpn\n") + + for _, neighbor := range neighbors { + fmt.Fprintf(&buf, " neighbor %s activate\n", neighbor) + } + buf.WriteString(" advertise-all-vni\n") + + for _, cfg := range macVRFs { + if cfg.RouteTarget == "" { + continue + } + fmt.Fprintf(&buf, " vni %d\n", cfg.VNI) + fmt.Fprintf(&buf, " route-target import %s\n", cfg.RouteTarget) + fmt.Fprintf(&buf, " route-target export %s\n", cfg.RouteTarget) + buf.WriteString(" exit-vni\n") + } + + buf.WriteString(" exit-address-family\n") + buf.WriteString("exit\n!\n") + + return buf.String() +} + +// genVRFEVPNSection generates a VRF router's EVPN address-family. +// +// router bgp 65000 vrf red +// address-family l2vpn evpn +// advertise ipv4 unicast +// advertise ipv6 unicast +// route-target import 65000:100 +// route-target export 65000:100 +// exit-address-family +// exit +// ! +func genVRFEVPNSection(asn uint32, cfg *ipVRFConfig) string { + var buf strings.Builder + fmt.Fprintf(&buf, "router bgp %d vrf %s\n", asn, cfg.VRFName) + buf.WriteString(" address-family l2vpn evpn\n") + + if cfg.HasIPv4 { + buf.WriteString(" advertise ipv4 unicast\n") + } + if cfg.HasIPv6 { + buf.WriteString(" advertise ipv6 unicast\n") + } + if cfg.RouteTarget != "" { + fmt.Fprintf(&buf, " route-target import %s\n", cfg.RouteTarget) + fmt.Fprintf(&buf, " route-target export %s\n", cfg.RouteTarget) + } + + buf.WriteString(" exit-address-family\n") + buf.WriteString("exit\n!\n") + + return buf.String() +} diff --git a/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go new file mode 100644 index 0000000000..1b1460441c --- /dev/null +++ b/go-controller/pkg/clustermanager/routeadvertisements/evpn_rawconfig_test.go @@ -0,0 +1,183 @@ +package routeadvertisements + +import ( + "testing" +) + +func TestGenerateEVPNRawConfig(t *testing.T) { + tests := []struct { + name string + selected *selectedNetworks + asn uint32 + neighbors []string + want string + }{ + { + name: "MAC-VRF without route target", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000}, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +`, + }, + { + name: "MAC-VRF with route target", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000, RouteTarget: "65000:1000"}, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +`, + }, + { + name: "IP-VRF IPv6", + selected: &selectedNetworks{ + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv6: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv6 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + { + name: "IP-VRF dual stack", + selected: &selectedNetworks{ + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv4: true, + HasIPv6: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + advertise-all-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + advertise ipv6 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + { + name: "MAC-VRF and IP-VRF combined", + selected: &selectedNetworks{ + macVRFConfigs: []*vrfConfig{ + {VNI: 1000, RouteTarget: "65000:1000"}, + }, + ipVRFConfigs: []*ipVRFConfig{ + { + vrfConfig: vrfConfig{VNI: 2000, RouteTarget: "65000:2000"}, + VRFName: "blue", + HasIPv4: true, + }, + }, + }, + asn: 65000, + neighbors: []string{"192.168.1.1", "192.168.1.2"}, + want: `router bgp 65000 + address-family l2vpn evpn + neighbor 192.168.1.1 activate + neighbor 192.168.1.2 activate + advertise-all-vni + vni 1000 + route-target import 65000:1000 + route-target export 65000:1000 + exit-vni + exit-address-family +exit +! +vrf blue + vni 2000 +exit-vrf +! +router bgp 65000 vrf blue + address-family l2vpn evpn + advertise ipv4 unicast + route-target import 65000:2000 + route-target export 65000:2000 + exit-address-family +exit +! +`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vrfASNs := map[string]uint32{} + for _, cfg := range tt.selected.ipVRFConfigs { + if cfg.VRFName != "" { + vrfASNs[cfg.VRFName] = tt.asn + } + } + got := generateEVPNRawConfig(tt.selected, tt.asn, tt.neighbors, vrfASNs) + if got != tt.want { + t.Errorf("generateEVPNRawConfig() mismatch\nGot:\n%s\nWant:\n%s", got, tt.want) + } + }) + } +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index 68409eba9b..f1ffd0dcc3 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -18,6 +18,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1" corev1informer "k8s.io/client-go/informers/core/v1" @@ -29,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/notifier" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" @@ -38,14 +40,42 @@ import ( userdefinednetworkscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/scheme" userdefinednetworkinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions/userdefinednetwork/v1" userdefinednetworklister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/listers/userdefinednetwork/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" + vteplister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/listers/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) -const conditionTypeNetworkCreated = "NetworkCreated" +const ( + conditionTypeNetworkCreated = "NetworkCreated" + + // Condition reasons + reasonNADCreated = "NetworkAttachmentDefinitionCreated" + reasonSyncError = "SyncError" + reasonVTEPNotFound = "VTEPNotFound" + reasonNADDeleted = "NetworkAttachmentDefinitionDeleted" + reasonNADSyncError = "NetworkAttachmentDefinitionSyncError" + + // MaxEVPNVIDs is the maximum number of VIDs available for EVPN networks (0-4094, but 0 and 1 are reserved). + MaxEVPNVIDs = 4095 + // reservedVIDZeroKey is the key used to reserve VID 0 (reserved per IEEE 802.1Q for priority tagging). + reservedVIDZeroKey = "__vid_zero_reserved__" + // reservedVIDOneKey is the key used to reserve VID 1 (default VLAN on many switches, avoided by convention). + reservedVIDOneKey = "__vid_one_reserved__" +) + +// macVRFKey returns the VID allocator key for a network's MAC-VRF. +func macVRFKey(networkName string) string { + return networkName + "/macvrf" +} + +// ipVRFKey returns the VID allocator key for a network's IP-VRF. +func ipVRFKey(networkName string) string { + return networkName + "/ipvrf" +} -type RenderNetAttachDefManifest func(obj client.Object, targetNamespace string) (*netv1.NetworkAttachmentDefinition, error) +type RenderNetAttachDefManifest func(obj client.Object, targetNamespace string, opts ...template.RenderOption) (*netv1.NetworkAttachmentDefinition, error) type networkInUseError struct { err error @@ -55,6 +85,15 @@ func (n *networkInUseError) Error() string { return n.err.Error() } +// vtepNotFoundError indicates that a required VTEP CR does not exist. +type vtepNotFoundError struct { + vtepName string +} + +func (e *vtepNotFoundError) Error() string { + return fmt.Sprintf("VTEP %q does not exist", e.vtepName) +} + type Controller struct { // cudnController manage ClusterUserDefinedNetwork CRs. cudnController controller.Controller @@ -76,6 +115,10 @@ type Controller struct { networkManager networkmanager.Interface + // vidAllocator allocates cluster-wide VLAN IDs for EVPN networks. + // VIDs are allocated per network name and stored in the NAD config JSON. + vidAllocator id.Allocator + udnClient userdefinednetworkclientset.Interface udnLister userdefinednetworklister.UserDefinedNetworkLister cudnLister userdefinednetworklister.ClusterUserDefinedNetworkLister @@ -83,6 +126,10 @@ type Controller struct { nadLister netv1lister.NetworkAttachmentDefinitionLister podInformer corev1informer.PodInformer namespaceInformer corev1informer.NamespaceInformer + // vtepLister provides read access to VTEP CRs for validating EVPN configuration. + vtepLister vteplister.VTEPLister + // vtepNotifier notifies subscribing controllers about VTEP events. + vtepNotifier *notifier.VTEPNotifier networkInUseRequeueInterval time.Duration eventRecorder record.EventRecorder @@ -98,10 +145,15 @@ func New( networkManager networkmanager.Interface, podInformer corev1informer.PodInformer, namespaceInformer corev1informer.NamespaceInformer, + vtepInformer vtepinformer.VTEPInformer, eventRecorder record.EventRecorder, ) *Controller { udnLister := udnInformer.Lister() cudnLister := cudnInformer.Lister() + + // Allocates VIDs in range 1-4094 (0 is reserved per IEEE 802.1Q). + vidAllocator := id.NewIDAllocator("EVPN-VIDs", MaxEVPNVIDs) + c := &Controller{ nadClient: nadClient, nadLister: nadInfomer.Lister(), @@ -113,6 +165,7 @@ func New( namespaceInformer: namespaceInformer, networkManager: networkManager, namespaceTracker: map[string]sets.Set[string]{}, + vidAllocator: vidAllocator, eventRecorder: eventRecorder, } udnCfg := &controller.ControllerConfig[userdefinednetworkv1.UserDefinedNetwork]{ @@ -138,18 +191,30 @@ func New( c.nadNotifier = notifier.NewNetAttachDefNotifier(nadInfomer, c) c.namespaceNotifier = notifier.NewNamespaceNotifier(namespaceInformer, c) + // Setup EVPN components only when EVPN is enabled. + if util.IsEVPNEnabled() && vtepInformer != nil { + // Setup VTEP watching for EVPN support. + c.vtepLister = vtepInformer.Lister() + c.vtepNotifier = notifier.NewVTEPNotifier(vtepInformer, c) + } + return c } func (c *Controller) Run() error { klog.Infof("Starting user-defined network controllers") - if err := controller.StartWithInitialSync( - c.initializeNamespaceTracker, + + controllers := []controller.Reconciler{ c.cudnController, c.udnController, c.nadNotifier.Controller, c.namespaceNotifier.Controller, - ); err != nil { + } + if c.vtepNotifier != nil { + controllers = append(controllers, c.vtepNotifier.Controller) + } + + if err := controller.StartWithInitialSync(c.initializeController, controllers...); err != nil { return fmt.Errorf("unable to start user-defined network controller: %v", err) } @@ -162,57 +227,233 @@ func (c *Controller) Run() error { return nil } -// initializeNamespaceTracker populates the namespace-tracker with NAD namespaces who owned by the controller. -func (c *Controller) initializeNamespaceTracker() error { - cudns, err := c.cudnLister.List(labels.Everything()) +// initializeController performs all startup initialization before controllers begin processing. +func (c *Controller) initializeController() error { + // Reserve VID 0 and VID 1 to ensure they're never allocated to any network. + // VID 0 is reserved per IEEE 802.1Q standard. + // VID 1 is the default VLAN on many switches and avoided by convention. + if err := c.vidAllocator.ReserveID(reservedVIDZeroKey, 0); err != nil { + return fmt.Errorf("failed to reserve VID 0: %w", err) + } + if err := c.vidAllocator.ReserveID(reservedVIDOneKey, 1); err != nil { + return fmt.Errorf("failed to reserve VID 1: %w", err) + } + + cudnNADs, err := c.buildCUDNToNADs() if err != nil { return err } - if len(cudns) == 0 { + if len(cudnNADs) == 0 { return nil } + c.initializeNamespaceTracker(cudnNADs) + if util.IsEVPNEnabled() { + // Recover VID allocations from existing EVPN CUDNs. + // Recovery failures are logged and the affected CUDNs are enqueued for reconciliation, + // but don't block startup - this prevents a DoS where a malicious NAD could + // crash the entire cluster-manager. + c.recoverEVPNVIDs(cudnNADs) + } + + return nil +} + +// cudnWithNADs pairs a CUDN with its owned NADs. +type cudnWithNADs struct { + cudn *userdefinednetworkv1.ClusterUserDefinedNetwork + nads []netv1.NetworkAttachmentDefinition +} + +// cudnToNADs maps CUDN name to its object and owned NADs. +type cudnToNADs map[string]*cudnWithNADs + +// buildCUDNToNADs builds an index of CUDNs to their owned NADs. +// It returns an entry for every existing CUDN, including CUDNs that currently own no NADs +func (c *Controller) buildCUDNToNADs() (cudnToNADs, error) { + cudns, err := c.cudnLister.List(labels.Everything()) + if err != nil { + return nil, err + } + if len(cudns) == 0 { + return nil, nil + } + nads, err := c.nadLister.List(labels.Everything()) if err != nil { - return err + return nil, err } - if len(nads) == 0 { - return nil + + cudnByUID := make(map[types.UID]*userdefinednetworkv1.ClusterUserDefinedNetwork, len(cudns)) + index := make(cudnToNADs, len(cudns)) + for _, cudn := range cudns { + cudnByUID[cudn.UID] = cudn + index[cudn.Name] = &cudnWithNADs{cudn: cudn} } - indexedNADs := map[string]netv1.NetworkAttachmentDefinition{} + for _, nad := range nads { - if nad != nil { - indexedNADs[nad.Namespace+"/"+nad.Name] = *nad.DeepCopy() + if nad == nil { + continue + } + controllerRef := metav1.GetControllerOfNoCopy(nad) + if controllerRef == nil { + continue + } + if cudn, ok := cudnByUID[controllerRef.UID]; ok { + index[cudn.Name].nads = append(index[cudn.Name].nads, *nad.DeepCopy()) } } - for _, cudn := range cudns { - c.namespaceTracker[cudn.Name] = sets.New[string]() + return index, nil +} - for nadKey, nad := range indexedNADs { - if !metav1.IsControlledBy(&nad, cudn) { - continue - } - c.namespaceTracker[cudn.Name].Insert(nad.Namespace) +// initializeNamespaceTracker populates the namespace tracker with NAD namespaces owned by each CUDN. +func (c *Controller) initializeNamespaceTracker(cudnNADs cudnToNADs) { + for cudnName, entry := range cudnNADs { + c.namespaceTracker[cudnName] = sets.New[string]() + for _, nad := range entry.nads { + c.namespaceTracker[cudnName].Insert(nad.Namespace) + } + } +} - // Usually we don't want to mutate an iterated map, in this case - // the processed entry is removed because it shouldn't be processed - // again and not expected to be visited again, i.e.: the NAD should - // be recorded by the namespaceTracker once. - delete(indexedNADs, nadKey) +// recoverEVPNVIDs recovers VID allocations from existing EVPN CUDNs using +// NetworkManager's cached NetInfo. NetworkManager has already processed all NADs +// by the time this function is called (it starts before UDN controller). +// +// CUDNs are processed in order of creation timestamp (oldest first) to ensure +// deterministic VID assignment when conflicts occur. If two CUDNs have NADs +// claiming the same VID, the oldest CUDN wins ("first come, first served"). +// CUDN name is used as tie-breaker when timestamps are equal. +// +// If VID recovery fails for a CUDN (e.g., NetworkManager couldn't parse the NAD), +// this logs an error and enqueues the CUDN for reconciliation. +func (c *Controller) recoverEVPNVIDs(cudnNADs cudnToNADs) { + // Extract EVPN CUDNs with NADs into a slice for deterministic ordering. + evpnCUDNs := make([]*cudnWithNADs, 0, len(cudnNADs)) + for _, entry := range cudnNADs { + if entry.cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + continue + } + if len(entry.nads) == 0 { + klog.V(4).Infof("EVPN CUDN %s has no NADs, skipping VID recovery", entry.cudn.Name) + continue } + evpnCUDNs = append(evpnCUDNs, entry) } + // Sort by creation timestamp (oldest first) for deterministic conflict resolution. + // When two CUDNs have conflicting VIDs, the oldest one wins. + // Use name as tie-breaker when timestamps are equal for consistent ordering. + slices.SortFunc(evpnCUDNs, func(a, b *cudnWithNADs) int { + if a.cudn.CreationTimestamp.Before(&b.cudn.CreationTimestamp) { + return -1 + } + if b.cudn.CreationTimestamp.Before(&a.cudn.CreationTimestamp) { + return 1 + } + return strings.Compare(a.cudn.Name, b.cudn.Name) + }) + + for _, entry := range evpnCUDNs { + if err := c.recoverEVPNVIDsForCUDN(entry.cudn.Name); err != nil { + klog.Errorf("VID recovery failed for EVPN CUDN %s: %v. "+ + "The CUDN will be reconciled and existing NAD VIDs will be preserved if possible.", + entry.cudn.Name, err) + c.cudnController.Reconcile(entry.cudn.Name) + } + } +} + +// recoverEVPNVIDsForCUDN attempts to recover VIDs for a single CUDN using NetworkManager's cache. +// Returns nil if VIDs were successfully recovered or if no VIDs are allocated yet. +// Returns error if VID reservation fails (e.g., conflict with another network). +func (c *Controller) recoverEVPNVIDsForCUDN(cudnName string) error { + networkName := util.GenerateCUDNNetworkName(cudnName) + + // Use NetworkManager's cached NetInfo - it has already parsed the NAD + netInfo := c.networkManager.GetNetwork(networkName) + if netInfo == nil { + // NetworkManager doesn't have this network cached. This can happen if: + // - NetworkManager failed to parse the NAD (corrupted) + // - NAD doesn't exist yet + return fmt.Errorf("network %s not found in NetworkManager cache", networkName) + } + + macVRFVID := netInfo.EVPNMACVRFVID() + ipVRFVID := netInfo.EVPNIPVRFVID() + + // Check if this network has EVPN VIDs allocated + if macVRFVID == 0 && ipVRFVID == 0 { + klog.V(4).Infof("EVPN CUDN %s has no VIDs allocated yet, skipping recovery", cudnName) + return nil // No VIDs to recover + } + + if err := c.reserveRecoveredVIDs(cudnName, macVRFVID, ipVRFVID); err != nil { + return fmt.Errorf("failed to reserve VIDs for cudn %s: %w", cudnName, err) + } + + klog.V(4).Infof("Recovered VIDs for CUDN %s (macVRF=%d, ipVRF=%d)", cudnName, macVRFVID, ipVRFVID) return nil } +// reserveRecoveredVIDs reserves the given VIDs in the allocator for a network. +// VIDs of 0 are skipped (not allocated). +// +// Both VIDs are attempted even if one fails - this maximizes recovery and protects +// as many VIDs as possible. We don't release successfully reserved VIDs on partial +// failure because they represent state that already exists in NADs; releasing them +// could allow another network to "steal" the VID, causing route leakage. +func (c *Controller) reserveRecoveredVIDs(networkName string, macVRFVID, ipVRFVID int) error { + var errs []error + + if macVRFVID > 0 { + if err := c.vidAllocator.ReserveID(macVRFKey(networkName), macVRFVID); err != nil { + errs = append(errs, fmt.Errorf("failed to reserve VID %d for MAC-VRF of network %s: %w", macVRFVID, networkName, err)) + } else { + klog.V(4).Infof("Recovered VID %d for MAC-VRF of network %s", macVRFVID, networkName) + } + } + if ipVRFVID > 0 { + if err := c.vidAllocator.ReserveID(ipVRFKey(networkName), ipVRFVID); err != nil { + errs = append(errs, fmt.Errorf("failed to reserve VID %d for IP-VRF of network %s: %w", ipVRFVID, networkName, err)) + } else { + klog.V(4).Infof("Recovered VID %d for IP-VRF of network %s", ipVRFVID, networkName) + } + } + + return errors.Join(errs...) +} + +// releaseVIDForNetwork releases the VIDs allocated for a network's VRFs. +// +// NOTE: VID release is not synchronized with node-side dataplane cleanup. +// In theory, a rapidly created new network could get the same VID while nodes +// are still tearing down the old network's bridge configuration. In practice, +// VID collisions are unlikely because the allocator is monotonic and won't +// reallocate the same VID unless the pool fills up or CUDNs are recycled rapidly. +// The actual mitigation is on the node-side: nodes should check for VID conflicts +// and refuse to configure a VID already in use by a different network, waiting +// until the old network is cleaned up. +func (c *Controller) releaseVIDForNetwork(networkName string) { + macVID := c.vidAllocator.ReleaseID(macVRFKey(networkName)) + ipVID := c.vidAllocator.ReleaseID(ipVRFKey(networkName)) + if macVID >= 0 || ipVID >= 0 { + klog.V(4).Infof("Released VIDs for network %s: MAC-VRF=%d, IP-VRF=%d", networkName, macVID, ipVID) + } +} + func (c *Controller) Shutdown() { - controller.Stop( + controllers := []controller.Reconciler{ c.cudnController, c.udnController, c.nadNotifier.Controller, c.namespaceNotifier.Controller, - ) + } + if c.vtepNotifier != nil { + controllers = append(controllers, c.vtepNotifier.Controller) + } + controller.Stop(controllers...) } // ReconcileNetAttachDef enqueue NAD requests following NAD events. @@ -283,7 +524,7 @@ func (c *Controller) ReconcileNamespace(key string) error { if !affectedNamespace { cudn, err := c.cudnLister.Get(cudnName) if err != nil { - return fmt.Errorf("faild to get CUDN %q from cache: %w", cudnName, err) + return fmt.Errorf("failed to get CUDN %q from cache: %w", cudnName, err) } cudnSelector, err := metav1.LabelSelectorAsSelector(&cudn.Spec.NamespaceSelector) if err != nil { @@ -488,19 +729,19 @@ func newNetworkCreatedCondition(nad *netv1.NetworkAttachmentDefinition, syncErro networkCreatedCondition := &metav1.Condition{ Type: conditionTypeNetworkCreated, Status: metav1.ConditionTrue, - Reason: "NetworkAttachmentDefinitionCreated", + Reason: reasonNADCreated, Message: "NetworkAttachmentDefinition has been created", LastTransitionTime: now, } if nad != nil && !nad.DeletionTimestamp.IsZero() { networkCreatedCondition.Status = metav1.ConditionFalse - networkCreatedCondition.Reason = "NetworkAttachmentDefinitionDeleted" + networkCreatedCondition.Reason = reasonNADDeleted networkCreatedCondition.Message = "NetworkAttachmentDefinition is being deleted" } if syncError != nil { networkCreatedCondition.Status = metav1.ConditionFalse - networkCreatedCondition.Reason = "SyncError" + networkCreatedCondition.Reason = reasonSyncError networkCreatedCondition.Message = syncError.Error() } @@ -511,8 +752,8 @@ func (c *Controller) cudnNeedUpdate(_ *userdefinednetworkv1.ClusterUserDefinedNe return true } -// reconcileUDN get ClusterUserDefinedNetwork CR key and reconcile it according to spec. -// It creates NADs according to spec at the spesified selected namespaces. +// reconcileCUDN get ClusterUserDefinedNetwork CR key and reconcile it according to spec. +// It creates NADs according to spec at the specified selected namespaces. // The NAD objects are created with the same key as the request CR, having both kinds have the same key enable // the controller to act on NAD changes as well and reconciles NAD objects (e.g: in case NAD is deleted it will be re-created). func (c *Controller) reconcileCUDN(key string) error { @@ -539,6 +780,14 @@ func (c *Controller) reconcileCUDN(key string) error { return updateStatusErr } + // vtepNotFoundError is non-fatal: the status has been updated to reflect + // the missing VTEP, and the VTEPNotifier will re-queue this CUDN when + // the VTEP is created. No need to return an error that would cause retries. + var vtepNotFound *vtepNotFoundError + if errors.As(syncErr, &vtepNotFound) { + return updateStatusErr + } + return errors.Join(syncErr, updateStatusErr) } @@ -596,6 +845,7 @@ func (c *Controller) syncClusterUDN(cudn *userdefinednetworkv1.ClusterUserDefine delete(c.namespaceTracker, cudnName) metrics.DecrementCUDNCount(role, topology) metrics.DeleteDynamicUDNNodeCount(util.GenerateCUDNNetworkName(cudn.Name)) + c.releaseVIDForNetwork(cudnName) } return nil, nil @@ -616,6 +866,10 @@ func (c *Controller) syncClusterUDN(cudn *userdefinednetworkv1.ClusterUserDefine metrics.IncrementCUDNCount(role, topology) } + if err := c.validateEVPNVTEP(cudn); err != nil { + return nil, err + } + selectedNamespaces, err := c.getSelectedNamespaces(cudn.Spec.NamespaceSelector) if err != nil { return nil, fmt.Errorf("failed to get selected namespaces: %w", err) @@ -673,7 +927,7 @@ func (c *Controller) updateClusterUDNStatus(cudn *userdefinednetworkv1.ClusterUs return strings.Compare(a.Namespace, b.Namespace) }) - networkCreatedCondition := newClusterNetworCreatedCondition(nads, syncError) + networkCreatedCondition := newClusterNetworkCreatedCondition(nads, syncError) updated := meta.SetStatusCondition(&cudn.Status.Conditions, networkCreatedCondition) if !updated { @@ -707,7 +961,7 @@ func (c *Controller) updateClusterUDNStatus(cudn *userdefinednetworkv1.ClusterUs return nil } -func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, syncError error) metav1.Condition { +func newClusterNetworkCreatedCondition(nads []netv1.NetworkAttachmentDefinition, syncError error) metav1.Condition { var namespaces []string for _, nad := range nads { namespaces = append(namespaces, nad.Namespace) @@ -718,7 +972,7 @@ func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, condition := metav1.Condition{ Type: conditionTypeNetworkCreated, Status: metav1.ConditionTrue, - Reason: "NetworkAttachmentDefinitionCreated", + Reason: reasonNADCreated, Message: fmt.Sprintf("NetworkAttachmentDefinition has been created in following namespaces: [%s]", affectedNamespaces), LastTransitionTime: now, } @@ -731,15 +985,80 @@ func newClusterNetworCreatedCondition(nads []netv1.NetworkAttachmentDefinition, } if len(deletedNadKeys) > 0 { condition.Status = metav1.ConditionFalse - condition.Reason = "NetworkAttachmentDefinitionDeleted" + condition.Reason = reasonNADDeleted condition.Message = fmt.Sprintf("NetworkAttachmentDefinition are being deleted: %v", deletedNadKeys) } if syncError != nil { condition.Status = metav1.ConditionFalse - condition.Reason = "NetworkAttachmentDefinitionSyncError" - condition.Message = syncError.Error() + + // Check for specific error types to provide better status reasons + var vtepNotFound *vtepNotFoundError + if errors.As(syncError, &vtepNotFound) { + condition.Reason = reasonVTEPNotFound + condition.Message = fmt.Sprintf("Cannot create network: VTEP '%s' does not exist. "+ + "Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + vtepNotFound.vtepName) + } else { + condition.Reason = reasonNADSyncError + condition.Message = syncError.Error() + } } return condition } + +// validateEVPNVTEP validates EVPN configuration for a CUDN. +// Returns an error if EVPN is requested but disabled, or if the referenced VTEP doesn't exist. +func (c *Controller) validateEVPNVTEP(cudn *userdefinednetworkv1.ClusterUserDefinedNetwork) error { + if cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + return nil // Not an EVPN network + } + + if !util.IsEVPNEnabled() { + return fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + + // CEL validation ensures EVPN is set when transport is EVPN. + vtepName := cudn.Spec.Network.EVPN.VTEP + _, err := c.vtepLister.Get(vtepName) + if err != nil { + if apierrors.IsNotFound(err) { + return &vtepNotFoundError{vtepName: vtepName} + } + return fmt.Errorf("failed to get VTEP %q: %w", vtepName, err) + } + + return nil +} + +// ReconcileVTEP handles VTEP events by re-queuing all CUDNs that reference the VTEP. +// +// This uses O(n) iteration over all CUDNs rather than maintaining an index because: +// VTEP create/delete events are expected to be rare; scanning all CUDNs from the +// informer cache keeps the logic simple. If this becomes a hot path at large +// CUDN counts, add an informer indexer keyed by VTEP. +func (c *Controller) ReconcileVTEP(vtepName string) error { + cudns, err := c.cudnLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to list CUDNs: %w", err) + } + + for _, cudn := range cudns { + if cudnReferencesVTEP(cudn, vtepName) { + klog.V(4).InfoS("Re-queueing CUDN following VTEP event", "cudn", cudn.Name, "vtep", vtepName) + c.cudnController.Reconcile(cudn.Name) + } + } + + return nil +} + +// cudnReferencesVTEP returns true if the CUDN is an EVPN network referencing the given VTEP. +// CEL validation ensures EVPN is set when transport is EVPN. +func cudnReferencesVTEP(cudn *userdefinednetworkv1.ClusterUserDefinedNetwork, vtepName string) bool { + if cudn.Spec.Network.Transport != userdefinednetworkv1.TransportOptionEVPN { + return false + } + return cudn.Spec.Network.EVPN.VTEP == vtepName +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go index 735b0afea2..127552ac93 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go @@ -16,6 +16,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" + userdefinednetworkv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" @@ -35,16 +36,22 @@ func (c *Controller) updateNAD(obj client.Object, namespace string) (*netv1.Netw } } - desiredNAD, err := c.renderNadFn(obj, namespace) + existingNAD, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(obj.GetName()) + if err != nil && !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get NetworkAttachmentDefinition %s/%s from cache: %v", namespace, obj.GetName(), err) + } + + renderOpts, err := c.allocateEVPNVIDsIfNeeded(obj) if err != nil { - return nil, fmt.Errorf("failed to generate NetworkAttachmentDefinition: %w", err) + return nil, fmt.Errorf("failed to allocate EVPN VIDs: %w", err) } - nad, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(obj.GetName()) - if err != nil && !apierrors.IsNotFound(err) { - return nil, fmt.Errorf("failed to get NetworkAttachmentDefinition %s/%s from cache: %v", namespace, obj.GetName(), err) + desiredNAD, err := c.renderNadFn(obj, namespace, renderOpts...) + if err != nil { + return nil, fmt.Errorf("failed to generate NetworkAttachmentDefinition: %w", err) } - nadCopy := nad.DeepCopy() + + nadCopy := existingNAD.DeepCopy() if nadCopy == nil { // creating NAD in case no primary network exist should be atomic and synchronized with @@ -119,7 +126,7 @@ func (c *Controller) deleteNAD(obj client.Object, namespace string) error { pods, err := c.podInformer.Lister().Pods(nadCopy.Namespace).List(labels.Everything()) if err != nil { - return fmt.Errorf("failed to list pods at target namesapce %q: %w", nadCopy.Namespace, err) + return fmt.Errorf("failed to list pods at target namespace %q: %w", nadCopy.Namespace, err) } // This is best-effort check no pod using the subject NAD, // noting prevent a from being pod creation right after this check. @@ -142,3 +149,55 @@ func (c *Controller) deleteNAD(obj client.Object, namespace string) error { return nil } + +// allocateEVPNVIDsIfNeeded checks if the object is an EVPN network and allocates VIDs if needed. +// Returns render options containing the allocated VIDs, or empty options for non-EVPN networks. +// Returns an error if EVPN transport is requested but the feature flag is disabled. +// +// This function relies on the idempotency of AllocateID: if a VID was already allocated for a key +// (either during recovery or a previous reconciliation), AllocateID returns the same VID. +// This means VIDs are stable across reconciliations without needing to parse the existing NAD. +func (c *Controller) allocateEVPNVIDsIfNeeded(obj client.Object) ([]template.RenderOption, error) { + spec := template.GetSpec(obj) + if spec.GetTransport() != userdefinednetworkv1.TransportOptionEVPN { + return nil, nil + } + + // EVPN transport is requested - ensure the feature is enabled. + if !util.IsEVPNEnabled() { + return nil, fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + + evpnCfg := spec.GetEVPN() + if evpnCfg == nil { + return nil, nil + } + + networkName := obj.GetName() + var macVRFVID, ipVRFVID int + + // Allocate VID for MAC-VRF if present + if evpnCfg.MACVRF != nil { + vid, err := c.vidAllocator.AllocateID(macVRFKey(networkName)) + if err != nil { + return nil, fmt.Errorf("failed to allocate VID for MAC-VRF: %w", err) + } + macVRFVID = vid + klog.V(4).InfoS("Allocated VID for MAC-VRF", "network", networkName, "vid", vid) + } + + // Allocate VID for IP-VRF if present + if evpnCfg.IPVRF != nil { + vid, err := c.vidAllocator.AllocateID(ipVRFKey(networkName)) + if err != nil { + return nil, fmt.Errorf("failed to allocate VID for IP-VRF: %w", err) + } + ipVRFVID = vid + klog.V(4).InfoS("Allocated VID for IP-VRF", "network", networkName, "vid", vid) + } + + // Return render options with allocated VIDs. + // Note: API validation ensures at least one of macVRF or ipVRF is specified, + // so at least one VID will be allocated if we reach here. + return []template.RenderOption{template.WithEVPNVIDs(macVRFVID, ipVRFVID)}, nil +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 2a07e96dbe..166931625d 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -2,6 +2,7 @@ package userdefinednetwork import ( "context" + "encoding/json" "errors" "fmt" "strings" @@ -22,10 +23,13 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" udnclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" udnfakeclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -39,6 +43,7 @@ var _ = Describe("User Defined Network Controller", func() { var ( cs *util.OVNClusterManagerClientset f *factory.WatchFactory + nm networkmanager.Controller ) BeforeEach(func() { @@ -46,9 +51,16 @@ var _ = Describe("User Defined Network Controller", func() { Expect(config.PrepareTestConfig()).To(Succeed()) config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableNetworkSegmentation = true + // Enable EVPN for EVPN-related tests + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true }) AfterEach(func() { + if nm != nil { + nm.Stop() + nm = nil + } if f != nil { f.Shutdown() } @@ -65,7 +77,30 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).NotTo(HaveOccurred()) return New(cs.NetworkAttchDefClient, f.NADInformer(), cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(), - renderNADStub, networkManager.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), nil, + renderNADStub, networkManager.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), f.VTEPInformer(), nil, + ) + } + + // newTestControllerWithNetworkManager creates a controller with a started NetworkManager. + newTestControllerWithNetworkManager := func(renderNADStub RenderNetAttachDefManifest, objects ...runtime.Object) *Controller { + cs = util.GetOVNClientset(objects...).GetClusterManagerClientset() + var err error + f, err = factory.NewClusterManagerWatchFactory(cs) + Expect(err).NotTo(HaveOccurred()) + Expect(f.Start()).To(Succeed()) + + nm, err = networkmanager.NewForCluster(&networkmanager.FakeControllerManager{}, f, cs, nil, id.NewTunnelKeyAllocator("TunnelKeys")) + Expect(err).NotTo(HaveOccurred()) + // Start NetworkManager - it will process existing NADs and cache their VIDs + Expect(nm.Start()).To(Succeed()) + + var vtepInformer vtepinformer.VTEPInformer + if util.IsEVPNEnabled() { + vtepInformer = f.VTEPInformer() + } + return New(cs.NetworkAttchDefClient, f.NADInformer(), + cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(), + renderNADStub, nm.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), vtepInformer, nil, ) } @@ -445,6 +480,827 @@ var _ = Describe("User Defined Network Controller", func() { } }) + It("should allocate VID for EVPN network NAD", func() { + testNs := testNamespace("evpn-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-test]", + }})) + + // Verify VID was allocated in the NAD config + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated for EVPN MAC-VRF (first available after 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate VID for EVPN network NAD with IP-VRF only", func() { + testNs := testNamespace("evpn-ipvrf-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNIPVRFClusterUDN("evpn-ipvrf-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-ipvrf-test]", + }})) + + // Verify VID was allocated in the NAD config (IP-VRF only, no MAC-VRF) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(0), "MAC-VRF should not be present for IP-VRF only config") + g.Expect(ipVID).To(Equal(2), "VID should be allocated for EVPN IP-VRF only (first available after 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate separate VIDs for EVPN network with both MAC-VRF and IP-VRF (symmetric IRB)", func() { + testNs := testNamespace("evpn-irb-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-irb-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + var err error + cudn, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-irb-test]", + }})) + + // Verify both VIDs were allocated with different values + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF should get VID 2 (first available)") + g.Expect(ipVID).To(Equal(3), "IP-VRF should get VID 3") + }).Should(Succeed()) + }) + + It("should allocate different VIDs for multiple EVPN networks", func() { + testNs := testNamespace("evpn-multi-test") + vtep := testVTEP("vtep-test") + cudn1 := testEVPNClusterUDN("evpn-cudn-1", vtep.Name, testNs.Name) + cudn2 := testEVPNClusterUDN("evpn-cudn-2", vtep.Name, testNs.Name) + cudn2.UID = "2" // Different UID for second CUDN + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn1, cudn2, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for both NADs to be created and have VIDs, and verify they are different + Eventually(func(g Gomega) { + nad1, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), "evpn-cudn-1", metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + nad2, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), "evpn-cudn-2", metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + vid1, _ := evpnVIDsFromNAD(nad1) + vid2, _ := evpnVIDsFromNAD(nad2) + g.Expect(vid1).To(BeNumerically(">", 0), "NAD 1 should have VID allocated") + g.Expect(vid2).To(BeNumerically(">", 0), "NAD 2 should have VID allocated") + // VIDs should be different from each other + // Note: Order is non-deterministic due to concurrent CUDN processing + g.Expect(vid1).NotTo(Equal(vid2), "VIDs should be different for different networks") + }).Should(Succeed()) + }) + + It("should release VID when EVPN CUDN is deleted", func() { + testNs := testNamespace("evpn-delete-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-delete-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for CUDN to be processed and NAD created with VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "First CUDN should get VID 2 (first available)") + }).Should(Succeed()) + + // Verify VID is allocated in the controller's allocator + Expect(c.vidAllocator.GetID("evpn-delete-cudn/macvrf")).To(BeNumerically(">=", 0), "VID should be allocated") + + // Trigger deletion by setting DeletionTimestamp and processing + now := metav1.Now() + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.DeletionTimestamp = &now + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Wait for finalizer to be removed (indicating deletion was processed) + Eventually(func(g Gomega) { + updatedCUDN, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatedCUDN.Finalizers).To(BeEmpty(), "Finalizer should be removed after deletion") + // Verify VID is released from the allocator + g.Expect(c.vidAllocator.GetID("evpn-delete-cudn/macvrf")).To(Equal(-1), "VID should be released after deletion") + }).Should(Succeed()) + }) + + It("should release both MAC-VRF and IP-VRF VIDs when symmetric IRB CUDN is deleted", func() { + testNs := testNamespace("evpn-irb-delete-test") + vtep := testVTEP("vtep-irb-delete") + cudn := testSymmetricIRBClusterUDN("evpn-irb-delete", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for CUDN to be processed and NAD created with both VIDs + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF VID should be allocated (first available)") + g.Expect(ipVID).To(Equal(3), "IP-VRF VID should be allocated") + }).Should(Succeed()) + + // Verify both VIDs are allocated in the controller's allocator + Expect(c.vidAllocator.GetID("evpn-irb-delete/macvrf")).To(Equal(2), "MAC-VRF VID should be allocated (first available)") + Expect(c.vidAllocator.GetID("evpn-irb-delete/ipvrf")).To(Equal(3), "IP-VRF VID should be allocated") + + // Trigger deletion + now := metav1.Now() + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.DeletionTimestamp = &now + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Wait for finalizer to be removed and verify both VIDs are released + Eventually(func(g Gomega) { + updatedCUDN, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatedCUDN.Finalizers).To(BeEmpty(), "Finalizer should be removed after deletion") + // Verify both VIDs are released from the allocator + g.Expect(c.vidAllocator.GetID("evpn-irb-delete/macvrf")).To(Equal(-1), "MAC-VRF VID should be released after deletion") + g.Expect(c.vidAllocator.GetID("evpn-irb-delete/ipvrf")).To(Equal(-1), "IP-VRF VID should be released after deletion") + }).Should(Succeed()) + }) + + It("should preserve allocated VID when EVPN CUDN is updated", func() { + testNs := testNamespace("evpn-update-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-update-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for initial VID allocation + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated (first available)") + }).Should(Succeed()) + + // Update CUDN (trigger reconciliation) + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + cudn.Annotations = map[string]string{"updated": "true"} + _, err = cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Ensure VID remains the same after reconciliation + Consistently(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should remain consistent after CUDN update") + }, 500*time.Millisecond, 50*time.Millisecond).Should(Succeed()) + }) + + It("should continue startup and allocate new VID when all NADs are corrupted", func() { + // VID recovery failures no longer block startup to prevent DoS attacks + // via malicious NADs. Instead, the CUDN is enqueued for reconciliation + // and a new VID is allocated. + testNs := testNamespace("evpn-all-corrupted-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-all-corrupted", vtep.Name, testNs.Name) + + // Create a corrupted NAD owned by the CUDN - NetworkManager will fail to parse it + corruptedNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 0, 0) + corruptedNAD.Spec.Config = `{"transport":"evpn", invalid json - corrupted` + + // Use started NetworkManager - it will fail to parse the corrupted NAD + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, corruptedNAD) + + // Controller should start successfully (VID recovery failure logged but not fatal) + Expect(c.Run()).To(Succeed()) + + // The CUDN is enqueued for reconciliation and gets a new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Should allocate new VID since recovery failed (first available)") + }).Should(Succeed()) + }) + + It("should continue startup and allocate new VID when VID recovery encounters a conflict", func() { + // VID conflicts during recovery no longer block startup. + // Instead, the CUDN is enqueued for reconciliation and gets a new VID. + testNs := testNamespace("evpn-vid-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-conflict", vtep.Name, testNs.Name) + + // Create a NAD with VID 5 for MAC-VRF + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 5, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 5 for a DIFFERENT key to create a conflict during recovery + Expect(c.vidAllocator.ReserveID("conflicting-network/macvrf", 5)).To(Succeed()) + + // Controller should start successfully despite the conflict + Expect(c.Run()).To(Succeed()) + + // Recovery fails due to conflict, CUDN is enqueued for reconciliation and gets a new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Should allocate new VID since 5 is taken by another network (first available)") + }).Should(Succeed()) + }) + + It("should continue startup and preserve MAC-VRF VID when only IP-VRF VID recovery encounters a conflict", func() { + // When IP-VRF VID conflicts but MAC-VRF VID is available: + // - MAC-VRF recovery succeeds (VID reserved in allocator) + // - IP-VRF recovery fails (conflict) + // - CUDN is enqueued for reconciliation + // - MAC-VRF VID is preserved (already in allocator), IP-VRF gets new VID + testNs := testNamespace("evpn-ipvrf-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-ipvrf-conflict", vtep.Name, testNs.Name) + + // Create a symmetric IRB NAD with both MAC-VRF (VID 3) and IP-VRF (VID 7) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 3, 7) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 7 for IP-VRF of a DIFFERENT network to create a conflict + Expect(c.vidAllocator.ReserveID("other-network/ipvrf", 7)).To(Succeed()) + + // Controller should start successfully + Expect(c.Run()).To(Succeed()) + + // MAC-VRF VID 3 was successfully reserved during recovery. + // IP-VRF VID 7 conflicted, so during reconciliation it gets new VID 2 (first available). + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(3), "MAC-VRF VID should be preserved (recovery succeeded)") + g.Expect(ipVID).To(Equal(2), "IP-VRF gets new VID (first available, 0,1 reserved, 7 is taken)") + }).Should(Succeed()) + }) + + It("should continue startup and preserve IP-VRF VID when only MAC-VRF VID recovery encounters a conflict", func() { + // When MAC-VRF VID conflicts but IP-VRF VID is available: + // - MAC-VRF recovery fails (conflict) + // - IP-VRF recovery succeeds (VID reserved in allocator) + // - CUDN is enqueued for reconciliation + // - MAC-VRF gets new VID, IP-VRF VID is preserved + testNs := testNamespace("evpn-macvrf-conflict-test") + vtep := testVTEP("vtep-test") + cudn := testSymmetricIRBClusterUDN("evpn-macvrf-conflict", vtep.Name, testNs.Name) + + // Create a symmetric IRB NAD with both MAC-VRF (VID 3) and IP-VRF (VID 7) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 3, 7) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + + // Pre-reserve VID 3 for a DIFFERENT network to create a conflict during recovery + Expect(c.vidAllocator.ReserveID("other-network/macvrf", 3)).To(Succeed()) + + // Controller should start successfully + Expect(c.Run()).To(Succeed()) + + // IP-VRF VID 7 was successfully reserved during recovery. + // MAC-VRF VID 3 conflicted, so during reconciliation it gets new VID 2 (first available). + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, ipVID := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "MAC-VRF gets new VID (first available, 0,1 reserved, 3 is already taken)") + g.Expect(ipVID).To(Equal(7), "IP-VRF VID should be preserved (recovery succeeded)") + }).Should(Succeed()) + }) + + It("should not fail startup when CUDN exists but has no NADs yet", func() { + vtep := testVTEP("vtep-test") + // Create a CUDN without any NADs (namespace doesn't match selector) + cudnWithNoNADs := testEVPNClusterUDN("evpn-no-nads", vtep.Name, "nonexistent-ns") + + c = newTestControllerWithNetworkManager(renderNadStub(nil), cudnWithNoNADs, vtep) + + Expect(c.Run()).To(Succeed(), "Controller should start even when CUDN has no NADs") + + // No VID should be allocated since there are no NADs + Expect(c.vidAllocator.GetID("evpn-no-nads/macvrf")).To(Equal(-1), "No VID should be allocated for CUDN without NADs") + }) + + It("should recover VIDs from NetworkManager cache at startup", func() { + // This tests the production startup recovery path where: + // 1. NetworkManager is started and processes existing NADs + // 2. UDN controller starts and recovers VIDs from NetworkManager's cache + testNs := testNamespace("evpn-nm-recovery-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-nm-recovery", vtep.Name, testNs.Name) + + // Create an existing NAD with VID 42 (simulating a previous controller run) + existingNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 42, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep, existingNAD) + Expect(c.Run()).To(Succeed()) + + // VID should be recovered from NetworkManager cache at startup + Eventually(func() int { + return c.vidAllocator.GetID("evpn-nm-recovery/macvrf") + }).Should(Equal(42), "VID 42 should be recovered from NetworkManager cache at startup") + }) + + It("should recover VIDs in deterministic order based on CUDN creation timestamp", func() { + // When two CUDNs have NADs claiming the same VID, the older CUDN wins. + // This ensures deterministic behavior across restarts. + testNs1 := testNamespace("evpn-order-test-1") + testNs2 := testNamespace("evpn-order-test-2") + vtep := testVTEP("vtep-test") + + // Create two CUDNs with different creation timestamps and unique UIDs + olderCUDN := testEVPNClusterUDN("aaa-older-cudn", vtep.Name, testNs1.Name) + olderCUDN.UID = "older-uid-1" + olderCUDN.CreationTimestamp = metav1.NewTime(time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)) + + newerCUDN := testEVPNClusterUDN("zzz-newer-cudn", vtep.Name, testNs2.Name) + newerCUDN.UID = "newer-uid-2" + newerCUDN.CreationTimestamp = metav1.NewTime(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC)) + + // Both NADs claim VID 42 - this simulates a conflict scenario + olderNAD := testEVPNClusterUdnNADOwnedByCUDN(olderCUDN, testNs1.Name, vtep.Name, 42, 0) + newerNAD := testEVPNClusterUdnNADOwnedByCUDN(newerCUDN, testNs2.Name, vtep.Name, 42, 0) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, + olderCUDN, newerCUDN, testNs1, testNs2, vtep, olderNAD, newerNAD) + Expect(c.Run()).To(Succeed()) + + // The older CUDN should win the VID 42, regardless of alphabetical name order + // (newerCUDN has name "zzz-newer-cudn" which comes after "aaa-older-cudn" alphabetically, + // but olderCUDN should still win because it was created first) + Eventually(func() int { + return c.vidAllocator.GetID("aaa-older-cudn/macvrf") + }).Should(Equal(42), "Older CUDN should keep VID 42") + + // The newer CUDN loses the conflict and gets a new VID during reconciliation + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs2.Name).Get(context.Background(), newerCUDN.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Newer CUDN should get new VID (first available) since older CUDN won VID 42") + }).Should(Succeed()) + }) + + It("should return error when VID pool is exhausted", func() { + testNs := testNamespace("evpn-exhaustion-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-exhaust-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + + // Exhaust all available VIDs (2-4094) before starting the controller (0,1 already reserved) + for i := 2; i < MaxEVPNVIDs; i++ { + err := c.vidAllocator.ReserveID(fmt.Sprintf("exhaust-key-%d", i), i) + Expect(err).NotTo(HaveOccurred(), "should allocate VID %d", i) + } + + // Now start the controller - the EVPN CUDN should fail to get a VID + Expect(c.Run()).To(Succeed()) + + // Verify the pool is exhausted + _, err := c.vidAllocator.AllocateID("one-more-key") + Expect(err).To(HaveOccurred(), "VID pool should be exhausted") + + // The CUDN should report a sync error because VID allocation failed + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "NetworkAttachmentDefinitionSyncError", + Message: "failed to allocate EVPN VIDs: failed to allocate VID for MAC-VRF: failed to allocate the id for the resource evpn-exhaust-cudn/macvrf", + }}), "should report VID allocation failure in status") + + // Verify NAD was not created + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VID allocation fails") + }) + + It("should allocate VID after pool is freed up", func() { + testNs := testNamespace("evpn-free-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-free-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + + // Exhaust all VIDs except one (starting from 2, since 0,1 already reserved) + for i := 2; i < MaxEVPNVIDs-1; i++ { + err := c.vidAllocator.ReserveID(fmt.Sprintf("exhaust-key-%d", i), i) + Expect(err).NotTo(HaveOccurred()) + } + + // Start controller - it should successfully allocate the last available VID + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-free-test]", + }}), "should successfully create network with last available VID") + + // Verify the VID was allocated + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(MaxEVPNVIDs-1), "should get the last available VID") + }).Should(Succeed()) + }) + + It("should fail to start if VID 0 is already reserved by another resource", func() { + // This tests the defensive check that VID 0 (reserved per IEEE 802.1Q) + // must be reservable during controller initialization. + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest) + + // Reserve VID 0 with a DIFFERENT key (simulating corruption/bug) + Expect(c.vidAllocator.ReserveID("some-other-key", 0)).To(Succeed()) + + // Run should fail because initializeController can't reserve VID 0 + err := c.Run() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to reserve VID 0")) + }) + + It("should allocate new VID when namespace and NAD are created at runtime", func() { + // Scenario: Allocator has no VID for this key, namespace/NAD created at runtime + // This can happen when: + // - CUDN exists but had no matching namespaces at startup (no NADs to recover) + // - Admin later creates a namespace + // - Controller reconciles and allocates a new VID + // + // 1. Controller starts with CUDN but NO matching namespaces (no NADs created) + // 2. Allocator has NO VID for this key after startup + // 3. Namespace is created at runtime + // 4. Controller reconciles and allocates VID 2 (first available, 0,1 reserved) + vtep := testVTEP("vtep-test") + + // Namespace that doesn't exist at startup + const runtimeNsName = "runtime-ns-test" + + // CUDN with selector matching a namespace that doesn't exist yet + cudn := testEVPNClusterUDN("evpn-runtime-cudn", vtep.Name, runtimeNsName) + + // Start controller - no NADs to recover, allocator empty for this key + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, vtep) + Expect(c.Run()).To(Succeed()) + + // Create namespace at runtime (NAD will be created by controller) + testNs := testNamespace(runtimeNsName) + _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), testNs, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Controller reconciles and allocates VID 2 (first available, 0,1 reserved) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be allocated (first available, 0,1 reserved)") + }).Should(Succeed()) + }) + + It("should allocate new VID when existing NAD has VID taken by another CUDN", func() { + // Scenario: Allocator has no VID for this key, but NAD's VID is taken by another CUDN + // This can happen when: + // - CUDN-A had no matching namespaces at startup + // - CUDN-B had a NAD with VID 42 that was recovered + // - Someone manually creates NAD for CUDN-A with VID 42 (collision) + // + // 1. Controller starts with CUDN but NO matching namespaces + // 2. VID 42 is already reserved by a different CUDN + // 3. Namespace and NAD with VID 42 are created at runtime + // 4. Controller reconciles + // 5. VID 42 can't be reserved (taken) -> new VID allocated + vtep := testVTEP("vtep-test") + + // Namespace that doesn't exist at startup + const runtimeNsName = "runtime-conflict-test" + + cudn := testEVPNClusterUDN("evpn-runtime-conflict", vtep.Name, runtimeNsName) + + // Start controller - no NADs to recover, allocator empty for this key + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, vtep) + + // VID 42 is already reserved by another CUDN (simulates collision) + Expect(c.vidAllocator.ReserveID("another-cudn/macvrf", 42)).To(Succeed()) + + Expect(c.Run()).To(Succeed()) + + // Create namespace and NAD with VID 42 at runtime (collision with another CUDN) + testNs := testNamespace(runtimeNsName) + runtimeNAD := testEVPNClusterUdnNADOwnedByCUDN(cudn, testNs.Name, vtep.Name, 42, 0) + + _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), testNs, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Create(context.Background(), runtimeNAD, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Controller reconciles - VID 42 is taken, must allocate new VID + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be newly allocated since 42 is taken by another CUDN (first available)") + }).Should(Succeed()) + }) + + It("should revert manual NAD VID change when allocator already has VID for this key", func() { + // This tests the case where: + // - Allocator has VID 2 for this key (from initial NAD creation, first available) + // - Someone manually changes NAD to VID 42 + // - Allocator's VID 2 should win, NAD reverted to 2 + // Note: Whether VID 42 is free or taken doesn't matter - the allocator's + // existing VID takes precedence because ReserveID fails when key already has a VID. + testNs := testNamespace("evpn-vid-manual-change-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-manual-change-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for initial NAD creation (will get VID 2, first available) + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "Initial VID should be 2 (first available)") + }).Should(Succeed()) + + // Now manually update the NAD with VID 42 + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(setNADEVPNVIDs(nad, 42, 0)).To(Succeed()) + _, err = cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Update(context.Background(), nad, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // The NAD update triggers reconciliation. The allocator already has VID 2 + // for this key, so NAD is reverted to 2. + Eventually(func(g Gomega) { + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + macVID, _ := evpnVIDsFromNAD(nad) + g.Expect(macVID).To(Equal(2), "VID should be reverted to allocator's VID") + }).Should(Succeed()) + }) + + It("should report VTEPNotFound when EVPN CUDN references non-existent VTEP", func() { + testNs := testNamespace("evpn-vtep-missing-test") + cudn := testEVPNClusterUDN("evpn-vtep-missing", "default", testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs) + Expect(c.Run()).To(Succeed()) + + // CUDN should report VTEPNotFound status + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP 'default' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should report VTEPNotFound in status") + + // NAD should not be created when VTEP is missing + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VTEP is missing") + }) + + It("should create NAD when VTEP exists for EVPN CUDN", func() { + testNs := testNamespace("evpn-vtep-exists-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-vtep-exists", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // CUDN should succeed when VTEP exists + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-exists-test]", + }}), "should succeed when VTEP exists") + + // NAD should be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created when VTEP exists") + }) + + It("should automatically reconcile CUDN when VTEP is created after CUDN", func() { + testNs := testNamespace("evpn-vtep-transition-test") + vtepName := "default" + cudn := testEVPNClusterUDN("evpn-vtep-transition", vtepName, testNs.Name) + + // Start controller WITHOUT the VTEP - CUDN references non-existent VTEP + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs) + Expect(c.Run()).To(Succeed()) + + // Step 1: CUDN should initially report VTEPNotFound + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP '" + vtepName + "' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should initially report VTEPNotFound") + + // NAD should NOT exist yet + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when VTEP is missing") + + // Step 2: Create the VTEP dynamically - this should trigger VTEPNotifier + vtep := testVTEP(vtepName) + _, err = cs.VTEPClient.K8sV1().VTEPs().Create(context.Background(), vtep, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Step 3: CUDN should be automatically reconciled and succeed + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-transition-test]", + }}), "should succeed after VTEP is created") + + // NAD should now be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created after VTEP is created") + }) + + It("should only re-queue EVPN CUDNs when VTEP changes, not non-EVPN CUDNs", func() { + testNs := testNamespace("vtep-filter-test") + vtep := testVTEP("vtep-filter") + + // Create a non-EVPN CUDN (Layer2 without EVPN transport) + nonEvpnCUDN := testClusterUDN("non-evpn-cudn", testNs.Name) + nonEvpnCUDN.UID = "non-evpn-uid" + + // Create an EVPN CUDN that references the VTEP + evpnCUDN := testEVPNClusterUDN("evpn-cudn", vtep.Name, testNs.Name) + evpnCUDN.UID = "evpn-uid" + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, nonEvpnCUDN, evpnCUDN, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Wait for EVPN NAD to be created + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), evpnCUDN.Name, metav1.GetOptions{}) + return err + }).Should(Succeed()) + + // ReconcileVTEP should iterate over all CUDNs but only match the EVPN one + // This covers the non-EVPN path in cudnReferencesVTEP + err := c.ReconcileVTEP(vtep.Name) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should report VTEPNotFound when VTEP is deleted after CUDN creation", func() { + testNs := testNamespace("evpn-vtep-delete-test") + vtep := testVTEP("vtep-to-delete") + cudn := testEVPNClusterUDN("evpn-vtep-delete", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // Step 1: Verify NAD is created successfully when VTEP exists + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created in following namespaces: [evpn-vtep-delete-test]", + }}), "should initially succeed when VTEP exists") + + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + return err + }).Should(Succeed(), "NAD should be created when VTEP exists") + + // Step 2: Delete the VTEP - this should trigger VTEPNotifier + err := cs.VTEPClient.K8sV1().VTEPs().Delete(context.Background(), vtep.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Step 3: CUDN should be re-reconciled and report VTEPNotFound + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "VTEPNotFound", + Message: "Cannot create network: VTEP '" + vtep.Name + "' does not exist. Create the VTEP CR first or update the CUDN to reference an existing VTEP.", + }}), "should report VTEPNotFound after VTEP is deleted") + }) + + It("should fail when EVPN transport is requested but EVPN feature is disabled", func() { + // Disable EVPN feature flag for this test. + // No defer needed - BeforeEach resets config via PrepareTestConfig(). + config.OVNKubernetesFeature.EnableEVPN = false + + testNs := testNamespace("evpn-disabled-test") + vtep := testVTEP("vtep-test") + cudn := testEVPNClusterUDN("evpn-disabled-cudn", vtep.Name, testNs.Name) + + c = newTestControllerWithNetworkManager(template.RenderNetAttachDefManifest, cudn, testNs, vtep) + Expect(c.Run()).To(Succeed()) + + // CUDN should report error with message about EVPN flag + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "NetworkAttachmentDefinitionSyncError", + Message: "EVPN transport requested but EVPN feature is not enabled", + }}), "should report error when EVPN flag is disabled") + + // NAD should not be created when EVPN is disabled + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudn.Name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "NAD should not be created when EVPN is disabled") + }) + It("should update NAD annotations and preserve internal OVNK annotations on UDN update", func() { testNamespaces := []string{"red", "blue"} var objs []runtime.Object @@ -1604,7 +2460,208 @@ func failRenderNadStub(err error) RenderNetAttachDefManifest { } func newRenderNadStub(nad *netv1.NetworkAttachmentDefinition, err error) RenderNetAttachDefManifest { - return func(client.Object, string) (*netv1.NetworkAttachmentDefinition, error) { + return func(client.Object, string, ...template.RenderOption) (*netv1.NetworkAttachmentDefinition, error) { return nad, err } } + +func testEVPNClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"10.10.10.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + }, + }, + }, + }, + } +} + +// testEVPNClusterUdnNADWithVIDs creates an EVPN NAD with specific MAC-VRF and IP-VRF VIDs. +// Pass 0 for ipVID to create a MAC-VRF only NAD. +func testEVPNClusterUdnNADWithVIDs(name, namespace, vtepName string, macVID, ipVID int) *netv1.NetworkAttachmentDefinition { + nad := testClusterUdnNAD(name, namespace) + if ipVID > 0 { + // Symmetric IRB (both MAC-VRF and IP-VRF) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) + } else { + // MAC-VRF only + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) + } + return nad +} + +// testEVPNClusterUdnNADOwnedByCUDN creates an EVPN NAD with specific VIDs and sets up +// the OwnerReferences to indicate ownership by the given CUDN. +func testEVPNClusterUdnNADOwnedByCUDN(cudn *udnv1.ClusterUserDefinedNetwork, namespace, vtepName string, macVID, ipVID int) *netv1.NetworkAttachmentDefinition { + nad := testEVPNClusterUdnNADWithVIDs(cudn.Name, namespace, vtepName, macVID, ipVID) + nad.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: "k8s.ovn.org/v1", + Kind: "ClusterUserDefinedNetwork", + Name: cudn.Name, + UID: cudn.UID, + Controller: ptr.To(true), + BlockOwnerDeletion: ptr.To(true), + }, + } + return nad +} + +func testSymmetricIRBClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"10.10.10.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + }, + }, + }, + }, + } +} + +func testEVPNIPVRFClusterUDN(name string, vtepName string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { + return &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"k8s.ovn.org/user-defined-network": ""}, + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + Name: name, + UID: "1", + }, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + NamespaceSelector: metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: targetNamespaces, + }, + }}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRoleSecondary, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: vtepName, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + }, + }, + }, + }, + } +} + +func testVTEP(name string) *vtepv1.VTEP { + return &vtepv1.VTEP{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + UID: types.UID("vtep-" + name), + }, + Spec: vtepv1.VTEPSpec{ + CIDRs: vtepv1.DualStackCIDRs{"100.64.0.0/24"}, + Mode: vtepv1.VTEPModeManaged, + }, + } +} + +// evpnVIDsFromNAD extracts MAC-VRF and IP-VRF VIDs from a NAD config. +// Returns (macVID, ipVID) where 0 indicates the VRF is not present or has no VID. +func evpnVIDsFromNAD(nad *netv1.NetworkAttachmentDefinition) (macVID, ipVID int) { + if nad == nil { + return 0, 0 + } + var netConf ovncnitypes.NetConf + if err := json.Unmarshal([]byte(nad.Spec.Config), &netConf); err != nil { + return 0, 0 + } + if netConf.EVPN == nil { + return 0, 0 + } + if netConf.EVPN.MACVRF != nil { + macVID = netConf.EVPN.MACVRF.VID + } + if netConf.EVPN.IPVRF != nil { + ipVID = netConf.EVPN.IPVRF.VID + } + return macVID, ipVID +} + +// setNADEVPNVIDs modifies the MAC-VRF and/or IP-VRF VIDs in a NAD config. +// Pass 0 to leave a VID unchanged. This is used in tests to set specific VIDs +// without rewriting the entire config. +func setNADEVPNVIDs(nad *netv1.NetworkAttachmentDefinition, macVID, ipVID int) error { + var netConf ovncnitypes.NetConf + if err := json.Unmarshal([]byte(nad.Spec.Config), &netConf); err != nil { + return err + } + if netConf.EVPN == nil { + return fmt.Errorf("NAD has no EVPN config") + } + if macVID > 0 { + if netConf.EVPN.MACVRF == nil { + return fmt.Errorf("NAD has no EVPN MAC-VRF config") + } + netConf.EVPN.MACVRF.VID = macVID + } + if ipVID > 0 { + if netConf.EVPN.IPVRF == nil { + return fmt.Errorf("NAD has no EVPN IP-VRF config") + } + netConf.EVPN.IPVRF.VID = ipVID + } + configBytes, err := json.Marshal(netConf) + if err != nil { + return err + } + nad.Spec.Config = string(configBytes) + return nil +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go index afe0d93c03..50ed844a34 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace_test.go @@ -15,6 +15,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" udnv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -40,6 +41,7 @@ var _ = Describe("NamespaceNotifier", func() { KubeClient: kubeClient, NetworkAttchDefClient: netv1fake.NewSimpleClientset(), UserDefinedNetworkClient: udnv1fake.NewSimpleClientset(), + VTEPClient: vtepv1fake.NewSimpleClientset(), } var err error wf, err = factory.NewClusterManagerWatchFactory(fakeClient) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go new file mode 100644 index 0000000000..75eb8d7fcb --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep.go @@ -0,0 +1,70 @@ +package notifier + +import ( + "errors" + + "k8s.io/client-go/util/workqueue" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" +) + +// VTEPReconciler is the interface for controllers that need to react to VTEP events. +type VTEPReconciler interface { + ReconcileVTEP(key string) error +} + +// VTEPNotifier watches VTEP objects and notifies subscribers upon change. +// It enqueues the reconciled object keys in the subscribing controllers workqueue. +type VTEPNotifier struct { + Controller controller.Controller + + subscribers []VTEPReconciler +} + +// NewVTEPNotifier creates a new VTEPNotifier that watches VTEP CRs and notifies subscribers. +func NewVTEPNotifier(vtepInformer vtepinformer.VTEPInformer, subscribers ...VTEPReconciler) *VTEPNotifier { + c := &VTEPNotifier{ + subscribers: subscribers, + } + + vtepLister := vtepInformer.Lister() + cfg := &controller.ControllerConfig[vtepv1.VTEP]{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Reconcile: c.reconcile, + ObjNeedsUpdate: c.needUpdate, + Threadiness: 1, + Informer: vtepInformer.Informer(), + Lister: vtepLister.List, + } + c.Controller = controller.NewController("udn-vtep-controller", cfg) + + return c +} + +// needUpdate returns true when the VTEP has been created or deleted. +// We notify on create/delete so that CUDNs referencing this VTEP can be re-queued. +// IMPORTANT: Before adding update notifications, verify that all subscribers +// can handle increased event frequency. +func (c *VTEPNotifier) needUpdate(old, new *vtepv1.VTEP) bool { + vtepCreated := old == nil && new != nil + vtepDeleted := old != nil && new == nil + return vtepCreated || vtepDeleted +} + +// reconcile notifies subscribers with the VTEP key following VTEP events. +func (c *VTEPNotifier) reconcile(key string) error { + var errs []error + for _, subscriber := range c.subscribers { + if subscriber != nil { + // enqueue the reconciled VTEP key in the subscribers workqueue to + // enable the subscriber to act on VTEP changes + if err := subscriber.ReconcileVTEP(key); err != nil { + errs = append(errs, err) + } + } + } + + return errors.Join(errs...) +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go new file mode 100644 index 0000000000..111106f3e5 --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/vtep_test.go @@ -0,0 +1,198 @@ +package notifier + +import ( + "context" + "maps" + "strconv" + "sync" + + netv1fake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + frrfake "github.com/metallb/frr-k8s/pkg/client/clientset/versioned/fake" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" + rafake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/fake" + udnv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepv1fake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("VTEPNotifier", func() { + var ( + vtepClient *vtepv1fake.Clientset + wf *factory.WatchFactory + testVTEPNotifier *VTEPNotifier + ) + + BeforeEach(func() { + vtepClient = vtepv1fake.NewSimpleClientset() + + // enable features to make watch-factory start the VTEP informer + Expect(config.PrepareTestConfig()).To(Succeed()) + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true + fakeClient := &util.OVNClusterManagerClientset{ + KubeClient: fake.NewSimpleClientset(), + NetworkAttchDefClient: netv1fake.NewSimpleClientset(), + UserDefinedNetworkClient: udnv1fake.NewSimpleClientset(), + RouteAdvertisementsClient: rafake.NewSimpleClientset(), + FRRClient: frrfake.NewSimpleClientset(), + VTEPClient: vtepClient, + } + var err error + wf, err = factory.NewClusterManagerWatchFactory(fakeClient) + Expect(err).NotTo(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + }) + + AfterEach(func() { + wf.Shutdown() + }) + + var s *testVTEPSubscriber + + BeforeEach(func() { + s = &testVTEPSubscriber{reconciledKeys: map[string]int64{}} + testVTEPNotifier = NewVTEPNotifier(wf.VTEPInformer(), s) + Expect(controller.Start(testVTEPNotifier.Controller)).Should(Succeed()) + + // create test VTEPs + for i := 0; i < 3; i++ { + vtepName := "test-vtep-" + strconv.Itoa(i) + _, err := vtepClient.K8sV1().VTEPs().Create(context.Background(), testVTEP(vtepName), metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + }) + + AfterEach(func() { + if testVTEPNotifier != nil { + controller.Stop(testVTEPNotifier.Controller) + } + }) + + It("should notify VTEP create events", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + }) + + It("should notify VTEP delete events", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + + Expect(vtepClient.K8sV1().VTEPs().Delete(context.Background(), "test-vtep-2", metav1.DeleteOptions{})).To(Succeed()) + Expect(vtepClient.K8sV1().VTEPs().Delete(context.Background(), "test-vtep-0", metav1.DeleteOptions{})).To(Succeed()) + + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 2, + "test-vtep-1": 1, + "test-vtep-2": 2, + }), "should record additional two events, following VTEP deletion") + }) + + It("should NOT notify VTEP update events (spec/status changes)", func() { + Eventually(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + })) + + // Update VTEP spec (change CIDRs) + vtep, err := vtepClient.K8sV1().VTEPs().Get(context.Background(), "test-vtep-1", metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + vtep.Spec.CIDRs = vtepv1.DualStackCIDRs{"192.168.0.0/24"} + _, err = vtepClient.K8sV1().VTEPs().Update(context.Background(), vtep, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Updates should NOT trigger notification (needUpdate returns false for updates) + Consistently(func() map[string]int64 { + return s.GetReconciledKeys() + }).Should(Equal(map[string]int64{ + "test-vtep-0": 1, + "test-vtep-1": 1, + "test-vtep-2": 1, + }), "should NOT record additional events following VTEP update") + }) + + It("should notify multiple subscribers", func() { + // Stop the single-subscriber notifier + controller.Stop(testVTEPNotifier.Controller) + + // Create a second subscriber + s2 := &testVTEPSubscriber{reconciledKeys: map[string]int64{}} + + // Create a new notifier with multiple subscribers + testVTEPNotifier = NewVTEPNotifier(wf.VTEPInformer(), s, s2) + Expect(controller.Start(testVTEPNotifier.Controller)).Should(Succeed()) + + // Create a new VTEP + _, err := vtepClient.K8sV1().VTEPs().Create(context.Background(), testVTEP("test-vtep-new"), metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Both subscribers should be notified exactly once + Eventually(func(g Gomega) { + keys1 := s.GetReconciledKeys() + keys2 := s2.GetReconciledKeys() + g.Expect(keys1["test-vtep-new"]).To(BeEquivalentTo(1), "subscriber 1 should be notified exactly once") + g.Expect(keys2["test-vtep-new"]).To(BeEquivalentTo(1), "subscriber 2 should be notified exactly once") + }).Should(Succeed()) + }) +}) + +func testVTEP(name string) *vtepv1.VTEP { + return &vtepv1.VTEP{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: vtepv1.VTEPSpec{ + CIDRs: vtepv1.DualStackCIDRs{"10.10.10.0/24"}, + Mode: vtepv1.VTEPModeManaged, + }, + } +} + +type testVTEPSubscriber struct { + err error + reconciledKeys map[string]int64 + lock sync.RWMutex +} + +func (s *testVTEPSubscriber) ReconcileVTEP(key string) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.reconciledKeys[key]++ + return s.err +} + +func (s *testVTEPSubscriber) GetReconciledKeys() map[string]int64 { + s.lock.RLock() + defer s.lock.RUnlock() + + cp := map[string]int64{} + maps.Copy(cp, s.reconciledKeys) + return cp +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index e451ed3923..62850d4a23 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -33,15 +33,17 @@ type SpecGetter interface { GetLayer3() *userdefinednetworkv1.Layer3Config GetLayer2() *userdefinednetworkv1.Layer2Config GetLocalnet() *userdefinednetworkv1.LocalnetConfig + GetTransport() userdefinednetworkv1.TransportOption + GetEVPN() *userdefinednetworkv1.EVPNConfig } -func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*netv1.NetworkAttachmentDefinition, error) { +func RenderNetAttachDefManifest(obj client.Object, targetNamespace string, opts ...RenderOption) (*netv1.NetworkAttachmentDefinition, error) { if obj == nil { return nil, nil } if targetNamespace == "" { - return nil, fmt.Errorf("namspace should not be empty") + return nil, fmt.Errorf("namespace should not be empty") } var ownerRef metav1.OwnerReference @@ -62,7 +64,7 @@ func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*net nadName := util.GetNADName(targetNamespace, obj.GetName()) - nadSpec, err := RenderNADSpec(networkName, nadName, spec) + nadSpec, err := renderNADSpec(networkName, nadName, spec, applyOptions(opts)) if err != nil { return nil, err } @@ -79,12 +81,12 @@ func RenderNetAttachDefManifest(obj client.Object, targetNamespace string) (*net }, nil } -func RenderNADSpec(networkName, nadName string, spec SpecGetter) (*netv1.NetworkAttachmentDefinitionSpec, error) { +func renderNADSpec(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (*netv1.NetworkAttachmentDefinitionSpec, error) { if err := validateTopology(spec); err != nil { return nil, fmt.Errorf("invalid topology specified: %w", err) } - cniNetConf, err := renderCNINetworkConfig(networkName, nadName, spec) + cniNetConf, err := renderCNINetworkConfig(networkName, nadName, spec, opts) if err != nil { return nil, fmt.Errorf("failed to render CNI network config: %w", err) } @@ -98,7 +100,7 @@ func RenderNADSpec(networkName, nadName string, spec SpecGetter) (*netv1.Network }, nil } -// renderNADLabels copies labels from UDN to help RenderNADSpec +// renderNADLabels copies labels from UDN to help renderNADSpec // function add those labels to corresponding NAD func renderNADLabels(obj client.Object) map[string]string { labels := make(map[string]string) @@ -134,15 +136,16 @@ func validateTopology(spec SpecGetter) error { return nil } -func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[string]interface{}, error) { +func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (map[string]interface{}, error) { netConfSpec := &ovncnitypes.NetConf{ NetConf: cnitypes.NetConf{ CNIVersion: cniVersion, Type: OvnK8sCNIOverlay, Name: networkName, }, - NADName: nadName, - Topology: strings.ToLower(string(spec.GetTopology())), + NADName: nadName, + Topology: strings.ToLower(string(spec.GetTopology())), + Transport: transportFromCRD(string(spec.GetTransport())), } switch spec.GetTopology() { @@ -194,6 +197,14 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s netConfSpec.VLANID = int(cfg.VLAN.Access.ID) } } + + if spec.GetTransport() == userdefinednetworkv1.TransportOptionEVPN { + if !util.IsEVPNEnabled() { + return nil, fmt.Errorf("EVPN transport requested but EVPN feature is not enabled") + } + netConfSpec.EVPN = renderEVPNConfig(spec, opts) + } + if netConfSpec.AllowPersistentIPs && !config.OVNKubernetesFeature.EnablePersistentIPs { return nil, fmt.Errorf("allowPersistentIPs is set but persistentIPs is Disabled") } @@ -256,9 +267,33 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s cniNetConf["defaultGatewayIPs"] = netConfSpec.DefaultGatewayIPs } } + + if netConfSpec.Transport != "" { + cniNetConf["transport"] = netConfSpec.Transport + } + if netConfSpec.EVPN != nil { + cniNetConf["evpn"] = netConfSpec.EVPN + } + return cniNetConf, nil } +// transportFromCRD converts CRD PascalCase format to canonical format. +// CRD format uses PascalCase: "Geneve", "NoOverlay", "EVPN" +// Returns canonical lowercase format: "geneve", "no-overlay", "evpn" +func transportFromCRD(crdTransport string) string { + switch crdTransport { + case "Geneve": + return types.NetworkTransportGeneve + case "NoOverlay": + return types.NetworkTransportNoOverlay + case "EVPN": + return types.NetworkTransportEVPN + default: + return crdTransport // Return as-is for validation to catch + } +} + func localnetMTU(desiredMTU int32) int { // The MTU for localnet topology should be as the default MTU (1500) because the underlay // is not part of the SDN and compensating for the SDN overhead (100) is not required. @@ -332,6 +367,36 @@ func ipString(ips userdefinednetworkv1.DualStackIPs) string { return strings.Join(ipStrings, ",") } +// renderEVPNConfig converts the EVPN configuration from the spec into the CNI EVPNConfig format. +// Note: evpnCfg is guaranteed to be non-nil by CEL validation on the CRD. +func renderEVPNConfig(spec SpecGetter, opts *RenderOptions) *ovncnitypes.EVPNConfig { + evpnCfg := spec.GetEVPN() + evpnConfig := &ovncnitypes.EVPNConfig{ + VTEP: evpnCfg.VTEP, + } + + if evpnCfg.MACVRF != nil { + evpnConfig.MACVRF = &ovncnitypes.VRFConfig{ + VNI: evpnCfg.MACVRF.VNI, + RouteTarget: string(evpnCfg.MACVRF.RouteTarget), + } + if opts != nil && opts.EVPNVIDs != nil && opts.EVPNVIDs.MACVRFVID > 0 { + evpnConfig.MACVRF.VID = opts.EVPNVIDs.MACVRFVID + } + } + if evpnCfg.IPVRF != nil { + evpnConfig.IPVRF = &ovncnitypes.VRFConfig{ + VNI: evpnCfg.IPVRF.VNI, + RouteTarget: string(evpnCfg.IPVRF.RouteTarget), + } + if opts != nil && opts.EVPNVIDs != nil && opts.EVPNVIDs.IPVRFVID > 0 { + evpnConfig.IPVRF.VID = opts.EVPNVIDs.IPVRFVID + } + } + + return evpnConfig +} + func GetSpec(obj client.Object) SpecGetter { switch o := obj.(type) { case *userdefinednetworkv1.UserDefinedNetwork: diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index e44cee4366..5881617c6b 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -1,6 +1,7 @@ package template import ( + "encoding/json" "strings" netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" @@ -21,15 +22,20 @@ import ( var _ = Describe("NetAttachDefTemplate", func() { - // before each test, set the IPv4Mode and IPv6Mode to true BeforeEach(func() { + // Restore global default values before each testcase + Expect(config.PrepareTestConfig()).To(Succeed()) config.IPv4Mode = true config.IPv6Mode = true + // Enable EVPN for tests that use EVPN transport + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableRouteAdvertisements = true + config.OVNKubernetesFeature.EnableEVPN = true }) DescribeTable("should fail to render NAD spec given", func(spec *udnv1.UserDefinedNetworkSpec, expectedError string) { - _, err := RenderNADSpec("foo", "bar", spec) + _, err := renderNADSpec("foo", "bar", spec, nil) Expect(err).To(MatchError(ContainSubstring(expectedError))) }, Entry("invalid layer2 subnets", @@ -631,8 +637,342 @@ var _ = Describe("NetAttachDefTemplate", func() { "allowPersistentIPs": true }`, ), + Entry("primary network, layer2 with EVPN transport and MAC-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100, + "routeTarget": "65000:100" + } + } + }`, + ), + Entry("primary network, layer3 with EVPN transport and IP-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: []udnv1.Layer3Subnet{ + {CIDR: "192.168.100.0/16"}, + }, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "65000:200", + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer3", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "subnets": "192.168.100.0/16", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "ipVRF": { + "vni": 200, + "routeTarget": "65000:200" + } + } + }`, + ), + Entry("primary network, layer2 with EVPN transport, MAC-VRF and IP-VRF", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "100000:100", // 4-byte ASN format + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "192.168.1.1:200", // IPv4 format + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100, + "routeTarget": "100000:100" + }, + "ipVRF": { + "vni": 200, + "routeTarget": "192.168.1.1:200" + } + } + }`, + ), + Entry("primary network, layer2 with EVPN transport, MAC-VRF with VNI only (no RouteTarget)", + udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + MTU: 1500, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + // RouteTarget intentionally omitted + }, + }, + }, + `{ + "cniVersion": "1.0.0", + "type": "ovn-k8s-cni-overlay", + "name": "cluster_udn_test-net", + "netAttachDefName": "mynamespace/test-net", + "role": "primary", + "topology": "layer2", + "joinSubnet": "100.65.0.0/16,fd99::/64", + "transitSubnet": "100.88.0.0/16", + "subnets": "192.168.100.0/24", + "mtu": 1500, + "transport": "evpn", + "evpn": { + "vtep": "my-vtep", + "macVRF": { + "vni": 100 + } + } + }`, + ), ) + Context("EVPN VID injection", func() { + It("should inject VIDs into EVPN config when provided via WithEVPNVIDs", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + IPVRF: &udnv1.VRFConfig{ + VNI: 200, + RouteTarget: "65000:200", + }, + }, + }, + }, + } + + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(12, 13)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + Expect(netConf.EVPN).NotTo(BeNil(), "evpnConfig should be present") + Expect(netConf.EVPN.MACVRF).NotTo(BeNil(), "macVRF should be present") + Expect(netConf.EVPN.MACVRF.VID).To(Equal(12), "macVRF VID should be 12") + Expect(netConf.EVPN.IPVRF).NotTo(BeNil(), "ipVRF should be present") + Expect(netConf.EVPN.IPVRF.VID).To(Equal(13), "ipVRF VID should be 13") + }) + + It("should omit VID when zero (VID=0 not injected)", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-no-vid", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + }, + } + + // Pass VID=0 for both (should be omitted from JSON, unmarshals as zero value) + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(0, 0)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + Expect(netConf.EVPN).NotTo(BeNil(), "evpnConfig should be present") + Expect(netConf.EVPN.MACVRF).NotTo(BeNil(), "macVRF should be present") + Expect(netConf.EVPN.MACVRF.VID).To(Equal(0), "VID should be zero when not injected") + + // Also verify the raw JSON doesn't contain "vid" field (omitempty) + Expect(nad.Spec.Config).NotTo(ContainSubstring(`"vid"`), "vid field should be omitted from JSON when zero") + }) + + It("should omit empty RouteTarget in EVPN config", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-no-rt", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + // RouteTarget intentionally omitted (empty) + }, + }, + }, + }, + } + + nad, err := RenderNetAttachDefManifest(cudn, "test-ns", WithEVPNVIDs(5, 0)) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).NotTo(BeNil()) + + var netConf ovncnitypes.NetConf + err = json.Unmarshal([]byte(nad.Spec.Config), &netConf) + Expect(err).NotTo(HaveOccurred()) + + // RouteTarget should be empty (omitted in JSON, unmarshals as empty string) + Expect(netConf.EVPN.MACVRF.RouteTarget).To(BeEmpty(), "empty routeTarget should unmarshal as empty string") + + // Also verify the raw JSON doesn't contain "routeTarget" field + Expect(nad.Spec.Config).NotTo(ContainSubstring(`"routeTarget"`), "routeTarget should be omitted from JSON when empty") + + // VID should be present + Expect(netConf.EVPN.MACVRF.VID).To(Equal(5), "macVRF VID should be 5") + }) + + It("should handle nil RenderOption without panic", func() { + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-nil-option", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.0.0/16"}, + }, + }, + }, + } + + // Pass nil option - should not panic + var nilOpt RenderOption + Expect(func() { + _, _ = RenderNetAttachDefManifest(cudn, "test-ns", nilOpt, WithEVPNVIDs(1, 2)) + }).NotTo(Panic()) + }) + + It("should fail when EVPN transport is requested but EVPN feature is disabled", func() { + // Disable EVPN feature flag for this test. + // No defer needed - BeforeEach resets config via PrepareTestConfig(). + config.OVNKubernetesFeature.EnableEVPN = false + + cudn := &udnv1.ClusterUserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{Name: "test-evpn-disabled", UID: "1"}, + Spec: udnv1.ClusterUserDefinedNetworkSpec{ + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24"}, + }, + Transport: udnv1.TransportOptionEVPN, + EVPN: &udnv1.EVPNConfig{ + VTEP: "my-vtep", + MACVRF: &udnv1.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + }, + }, + }, + }, + } + + _, err := RenderNetAttachDefManifest(cudn, "test-ns") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("EVPN transport requested but EVPN feature is not enabled")) + }) + }) + It("should correctly assign transit Subnets", func() { // check no overlap, use default values netConf := &ovncnitypes.NetConf{ diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go new file mode 100644 index 0000000000..7b93ef34fd --- /dev/null +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/render_options.go @@ -0,0 +1,41 @@ +package template + +// RenderOption is a functional option for configuring NAD rendering. +type RenderOption func(*RenderOptions) + +// RenderOptions contains optional configuration for NAD rendering. +type RenderOptions struct { + EVPNVIDs *EVPNVIDs +} + +// EVPNVIDs contains pre-allocated VLAN IDs for EVPN MAC-VRF and IP-VRF. +type EVPNVIDs struct { + // MACVRFVID is the VLAN ID for the MAC-VRF (Layer 2 EVPN). + // A value of 0 means no VID is allocated for MAC-VRF. + MACVRFVID int + // IPVRFVID is the VLAN ID for the IP-VRF (Layer 3 EVPN). + // A value of 0 means no VID is allocated for IP-VRF. + IPVRFVID int +} + +// WithEVPNVIDs returns a RenderOption that sets the EVPN VIDs for rendering. +func WithEVPNVIDs(macVRFVID, ipVRFVID int) RenderOption { + return func(opts *RenderOptions) { + opts.EVPNVIDs = &EVPNVIDs{ + MACVRFVID: macVRFVID, + IPVRFVID: ipVRFVID, + } + } +} + +// applyOptions applies the given functional options and returns the resulting RenderOptions. +// Nil options in the slice are safely skipped to prevent panics. +func applyOptions(opts []RenderOption) *RenderOptions { + options := &RenderOptions{} + for _, opt := range opts { + if opt != nil { + opt(options) + } + } + return options +} diff --git a/go-controller/pkg/cni/types/types.go b/go-controller/pkg/cni/types/types.go index 0963f76507..8f8007e1fb 100644 --- a/go-controller/pkg/cni/types/types.go +++ b/go-controller/pkg/cni/types/types.go @@ -81,7 +81,7 @@ type NetConf struct { PhysicalNetworkName string `json:"physicalNetworkName,omitempty"` // Transport describes the transport protocol for east-west traffic. - // Valid values are "nooverlay", "geneve", and "evpn". + // Valid values are "no-overlay", "geneve", and "evpn". // Defaults to "geneve". Transport string `json:"transport,omitempty"` @@ -127,6 +127,9 @@ type VRFConfig struct { VNI int32 `json:"vni"` // RouteTarget is the BGP route target for this VRF. RouteTarget string `json:"routeTarget,omitempty"` + // VID is the VLAN ID used for local traffic segmentation on each node. + // Allocated cluster-wide by the UDN controller, one per VRF. + VID int `json:"vid,omitempty"` } // NetworkSelectionElement represents one element of the JSON format diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index c67738e9ec..55c985a187 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -1,6 +1,7 @@ package config import ( + "encoding/base64" "flag" "fmt" "net" @@ -101,6 +102,7 @@ var ( RawClusterSubnets: "10.128.0.0/14/23", Zone: types.OvnDefaultZone, RawUDNAllowedDefaultServices: "default/kubernetes,kube-system/kube-dns", + Transport: types.NetworkTransportGeneve, } // Logging holds logging-related parsed config file parameters and command-line overrides @@ -242,6 +244,14 @@ var ( V6TransitSubnet: "fd97::/64", } + // NoOverlay holds no-overlay mode configuration + NoOverlay = NoOverlayConfig{} + + // ManagedBGP holds managed BGP configuration + ManagedBGP = ManagedBGPConfig{ + ASNumber: 64512, // Default AS number + } + // Layer2UsesTransitRouter indicated whether the layer2 primary networks will use transit router. // It is a per-node setting and is also reflected in the node annotations. Layer2UsesTransitRouter bool @@ -253,6 +263,22 @@ const ( kubeServiceAccountFileCACert string = "ca.crt" ) +// No-overlay mode configuration option constants +const ( + // NoOverlayRoutingManaged indicates OVN-Kubernetes manages the routing + NoOverlayRoutingManaged string = "managed" + // NoOverlayRoutingUnmanaged indicates users manage the routing themselves + NoOverlayRoutingUnmanaged string = "unmanaged" + + // ManagedBGPTopologyFullMesh represents a full-mesh BGP topology + ManagedBGPTopologyFullMesh string = "full-mesh" + + // NoOverlaySNATEnabled enables SNAT for outbound traffic + NoOverlaySNATEnabled string = "enabled" + // NoOverlaySNATDisabled disables SNAT for outbound traffic + NoOverlaySNATDisabled string = "disabled" +) + // DefaultConfig holds parsed config file parameters and command-line overrides type DefaultConfig struct { // MTU value used for the overlay networks. @@ -336,6 +362,11 @@ type DefaultConfig struct { // UDNAllowedDefaultServices holds a list of namespaced names of // default cluster network services accessible from primary user-defined networks UDNAllowedDefaultServices []string + + // Transport specifies the transport technology used for the default network. + // Accepts: "geneve" or "no-overlay". + // Defaults to "geneve". + Transport string `gcfg:"transport"` } // LoggingConfig holds logging-related parsed config file parameters and command-line overrides @@ -407,6 +438,7 @@ type KubernetesConfig struct { CertDuration time.Duration `gcfg:"cert-duration"` Kubeconfig string `gcfg:"kubeconfig"` CACert string `gcfg:"cacert"` + CACertData string `gcfg:"cacert-data"` CAData []byte APIServer string `gcfg:"apiserver"` Token string `gcfg:"token"` @@ -620,6 +652,32 @@ type ClusterManagerConfig struct { V6TransitSubnet string `gcfg:"v6-transit-subnet"` } +// NoOverlayConfig holds configuration for no-overlay mode +type NoOverlayConfig struct { + // OutboundSNAT configures SNAT behavior for outbound traffic from pods on the default network. + // Supported values: "enabled" or "disabled". + // Required when transport=no-overlay. + OutboundSNAT string `gcfg:"outbound-snat"` + // Routing configures whether the pod network routing configuration is managed by + // OVN-Kubernetes or users. Supported values: "managed" or "unmanaged". + // Required when transport=no-overlay. + Routing string `gcfg:"routing"` +} + +// ManagedBGPConfig holds configuration for managed BGP +type ManagedBGPConfig struct { + // ASNumber specifies the AS number to be used by the BGP speakers on each node for its + // default VRF when no-overlay networks are configured with managed routing. + // It is shared by both the cluster default network and CUDNs. + // Supports both 16-bit (1-65535) and 32-bit (1-4294967295) AS numbers. + // Optional. Defaults to 64512 if not specified. + ASNumber uint32 `gcfg:"as-number"` + // Topology configures the BGP peering topology when routing is managed. + // Supported values: "full-mesh". + // Required when transport=no-overlay and routing=managed. + Topology string `gcfg:"topology"` +} + // OvnDBScheme describes the OVN database connection transport method type OvnDBScheme string @@ -651,6 +709,8 @@ type config struct { OvnKubeNode OvnKubeNodeConfig ClusterManager ClusterManagerConfig OvsPaths OvsPathConfig + NoOverlay NoOverlayConfig `gcfg:"no-overlay"` + ManagedBGP ManagedBGPConfig `gcfg:"bgp-managed"` } var ( @@ -671,6 +731,8 @@ var ( savedOvnKubeNode OvnKubeNodeConfig savedClusterManager ClusterManagerConfig savedOvsPaths OvsPathConfig + savedNoOverlay NoOverlayConfig + savedManagedBGP ManagedBGPConfig // legacy service-cluster-ip-range CLI option serviceClusterIPRange string @@ -701,6 +763,8 @@ func init() { savedOvnKubeNode = OvnKubeNode savedClusterManager = ClusterManager savedOvsPaths = OvsPaths + savedNoOverlay = NoOverlay + savedManagedBGP = ManagedBGP cli.VersionPrinter = func(_ *cli.Context) { fmt.Printf("Version: %s\n", Version) fmt.Printf("Git commit: %s\n", Commit) @@ -732,6 +796,8 @@ func PrepareTestConfig() error { OvnKubeNode = savedOvnKubeNode ClusterManager = savedClusterManager OvsPaths = savedOvsPaths + NoOverlay = savedNoOverlay + ManagedBGP = savedManagedBGP Kubernetes.DisableRequestedChassis = false EnableMulticast = false UnprivilegedMode = false @@ -754,6 +820,7 @@ func PrepareTestConfig() error { // Don't pick up defaults from the environment os.Unsetenv("KUBECONFIG") os.Unsetenv("K8S_CACERT") + os.Unsetenv("K8S_CACERT_DATA") os.Unsetenv("K8S_APISERVER") os.Unsetenv("K8S_TOKEN") os.Unsetenv("K8S_TOKEN_FILE") @@ -870,7 +937,7 @@ var CommonFlags = []cli.Flag{ }, &cli.StringFlag{ Name: "encap-type", - Usage: "The encapsulation protocol to use to transmit packets between hypervisors", + Usage: "The encapsulation protocol to use to transmit packets between hypervisors by OVN in overlay mode (geneve, vxlan, gre)", Destination: &cliConfig.Default.EncapType, Value: Default.EncapType, }, @@ -965,6 +1032,12 @@ var CommonFlags = []cli.Flag{ "it defaults to 24 if unspecified.", Destination: &cliConfig.Default.RawClusterSubnets, }, + &cli.StringFlag{ + Name: "transport", + Value: Default.Transport, + Usage: "Transport technology used for the default network, default to geneve if unspecified. (geneve, no-overlay)", + Destination: &cliConfig.Default.Transport, + }, &cli.BoolFlag{ Name: "unprivileged-mode", Usage: "Run ovnkube-node container in unprivileged mode. Valid only with --init-node option.", @@ -1310,6 +1383,11 @@ var K8sFlags = []cli.Flag{ Usage: "the absolute path to the Kubernetes API CA certificate (not required if --k8s-kubeconfig is given)", Destination: &cliConfig.Kubernetes.CACert, }, + &cli.StringFlag{ + Name: "k8s-cacert-data", + Usage: "the Base64 encoded Kubernetes API CA certificate data (not required if --k8s-kubeconfig is given)", + Destination: &cliConfig.Kubernetes.CACertData, + }, &cli.StringFlag{ Name: "k8s-token", Usage: "the Kubernetes API authentication token (not required if --k8s-kubeconfig is given)", @@ -1876,8 +1954,46 @@ func setOVSExternalID(exec kexec.Interface, key, value string) error { return nil } +// reconcileKubernetesAuthFields ensures that if a config stage provides Token/TokenFile +// or CACert/CACertData, stale value for any of these set by previous stage is cleared. +// This is required since any combination of these fields could be set by any stage +// and might get overwritten only partially. +func reconcileKubernetesAuthFields(k *KubernetesConfig, override *KubernetesConfig) { + // If this stage provided either Token or TokenFile, clear the other field + // not provided by this stage. + overrideHasToken := override.Token != "" + overrideHasTokenFile := override.TokenFile != "" + + if overrideHasToken || overrideHasTokenFile { + if !overrideHasToken { + k.Token = "" + } + if !overrideHasTokenFile { + k.TokenFile = "" + } + } + + // If this stage provided either CACert or CACertData, clear the other field + // not provided by this stage. + overrideHasCACert := override.CACert != "" + overrideHasCACertData := override.CACertData != "" + + if overrideHasCACert || overrideHasCACertData { + if !overrideHasCACert { + k.CACert = "" + } + if !overrideHasCACertData { + k.CACertData = "" + } + } +} + func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath string, defaults *Defaults) error { - // token adn ca.crt may be from files mounted in container. + // values for token, cacert, kubeconfig, api-server may be found in several places. + // Priority order (highest first): OVS config, command line options, config file, + // environment variables, service account files + + // token and ca.crt may be from files mounted in container. saConfig := savedKubernetes if data, err := os.ReadFile(filepath.Join(saPath, kubeServiceAccountFileToken)); err == nil { saConfig.Token = string(data) @@ -1891,16 +2007,13 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin return err } - // values for token, cacert, kubeconfig, api-server may be found in several places. - // Priority order (highest first): OVS config, command line options, config file, - // environment variables, service account files - envConfig := savedKubernetes envVarsMap := map[string]string{ "Kubeconfig": "KUBECONFIG", "BootstrapKubeconfig": "BOOTSTRAP_KUBECONFIG", "CertDir": "CERT_DIR", "CACert": "K8S_CACERT", + "CACertData": "K8S_CACERT_DATA", "APIServer": "K8S_APISERVER", "Token": "K8S_TOKEN", "TokenFile": "K8S_TOKEN_FILE", @@ -1915,16 +2028,19 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin if err := overrideFields(&Kubernetes, &envConfig, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &envConfig) // Copy config file values over default values if err := overrideFields(&Kubernetes, &file.Kubernetes, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &file.Kubernetes) // And CLI overrides over config file and default values if err := overrideFields(&Kubernetes, &cli.Kubernetes, &savedKubernetes); err != nil { return err } + reconcileKubernetesAuthFields(&Kubernetes, &cli.Kubernetes) // Grab default values from OVS external IDs if defaults.K8sAPIServer { @@ -1945,8 +2061,15 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin return fmt.Errorf("kubernetes kubeconfig file %q not found", Kubernetes.Kubeconfig) } - if Kubernetes.CACert != "" { - bytes, err := os.ReadFile(Kubernetes.CACert) + if Kubernetes.CACert != "" || Kubernetes.CACertData != "" { + var bytes []byte + var err error + if Kubernetes.CACert != "" { + bytes, err = os.ReadFile(Kubernetes.CACert) + } else { + bytes, err = base64.StdEncoding.DecodeString(Kubernetes.CACertData) + } + if err != nil { return err } @@ -2300,6 +2423,115 @@ func buildClusterManagerConfig(cli, file *config) error { return nil } +// buildNoOverlayConfig updates NoOverlay config from config file only +// NoOverlay configuration is only available in config file, not via CLI flags +func buildNoOverlayConfig(file *config) error { + // Copy config file values over default values + if err := overrideFields(&NoOverlay, &file.NoOverlay, &savedNoOverlay); err != nil { + return err + } + + return nil +} + +// validateNoOverlayConfig validates the no-overlay configuration +func validateNoOverlayConfig() error { + // Validate transport option + if Default.Transport != types.NetworkTransportGeneve && Default.Transport != types.NetworkTransportNoOverlay { + return fmt.Errorf("invalid transport %q: must be %q or %q", Default.Transport, types.NetworkTransportGeneve, types.NetworkTransportNoOverlay) + } + + // If transport is no-overlay, validate required no-overlay options + if Default.Transport == types.NetworkTransportNoOverlay { + if !OVNKubernetesFeature.EnableRouteAdvertisements { + return fmt.Errorf("enable-route-advertisements must be true when transport=%q", types.NetworkTransportNoOverlay) + } + if NoOverlay.OutboundSNAT == "" { + return fmt.Errorf("outbound-snat is required when transport=no-overlay") + } + if NoOverlay.OutboundSNAT != NoOverlaySNATEnabled && NoOverlay.OutboundSNAT != NoOverlaySNATDisabled { + return fmt.Errorf("invalid outbound-snat %q: must be %q or %q", NoOverlay.OutboundSNAT, NoOverlaySNATEnabled, NoOverlaySNATDisabled) + } + + if NoOverlay.Routing == "" { + return fmt.Errorf("routing is required when transport=no-overlay") + } + if NoOverlay.Routing != NoOverlayRoutingManaged && NoOverlay.Routing != NoOverlayRoutingUnmanaged { + return fmt.Errorf("invalid routing %q: must be %q or %q", NoOverlay.Routing, NoOverlayRoutingManaged, NoOverlayRoutingUnmanaged) + } + + // If routing is managed, topology is required + if NoOverlay.Routing == NoOverlayRoutingManaged { + if ManagedBGP.Topology == "" { + return fmt.Errorf("topology is required when routing=managed") + } + if ManagedBGP.Topology != ManagedBGPTopologyFullMesh { + return fmt.Errorf("invalid topology %q: must be %q", ManagedBGP.Topology, ManagedBGPTopologyFullMesh) + } + } + } else { + // Warn if no-overlay or BGP config is specified but transport is not no-overlay + if NoOverlay.OutboundSNAT != "" || NoOverlay.Routing != "" { + klog.Warningf("[no-overlay] configuration specified but transport is %q; configuration will be ignored", Default.Transport) + } + } + + return nil +} + +// validateConfig performs all configuration validations after configs are built and completed. +// This is the centralized place called after completeConfig() that orchestrates all validations. +func validateConfig() error { + // Validate managed BGP configuration + if err := validateManagedBGPConfig(); err != nil { + return err + } + + // Validate no-overlay/transport configuration + if err := validateNoOverlayConfig(); err != nil { + return err + } + + return nil +} + +// buildManagedBGPConfig updates managed BGP config from config file only +// ManagedBGP configuration is only available in config file, not via CLI flags +func buildManagedBGPConfig(file *config) error { + // Copy config file values over default values + if err := overrideFields(&ManagedBGP, &file.ManagedBGP, &savedManagedBGP); err != nil { + return err + } + + return nil +} + +// validateManagedBGPConfig validates the managed BGP configuration +func validateManagedBGPConfig() error { + // Validate AS number is in valid range + // Valid AS numbers: 1-4294967295 (32-bit) + // Reserved ranges: + // 0 - Reserved (RFC 7607) + // 23456 - AS_TRANS (RFC 6793) + // 65535 - Reserved (RFC 7300) + // 4294967295 - Reserved (RFC 7300) + + if ManagedBGP.ASNumber == 0 { + return fmt.Errorf("invalid as-number: 0 is reserved") + } + if ManagedBGP.ASNumber == 23456 { + return fmt.Errorf("invalid as-number: 23456 is reserved (AS_TRANS for 16-bit to 32-bit AS translation)") + } + if ManagedBGP.ASNumber == 65535 { + return fmt.Errorf("invalid as-number: 65535 is reserved") + } + if ManagedBGP.ASNumber == 4294967295 { + return fmt.Errorf("invalid as-number: 4294967295 is reserved") + } + + return nil +} + // completeClusterManagerConfig completes the ClusterManager config by parsing raw values // into their final form. func completeClusterManagerConfig(allSubnets *ConfigSubnets) error { @@ -2352,6 +2584,7 @@ func buildDefaultConfig(cli, file *config) error { if Default.Zone == "" { Default.Zone = types.OvnDefaultZone } + return nil } @@ -2432,6 +2665,7 @@ func stripTokenFromK8sConfig() KubernetesConfig { // Token and CAData are sensitive fields so stripping // them while logging. k8sConf.Token = "" + k8sConf.CACertData = "" k8sConf.CAData = []byte{} return k8sConf } @@ -2462,6 +2696,8 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d OvnKubeNode: savedOvnKubeNode, ClusterManager: savedClusterManager, OvsPaths: savedOvsPaths, + NoOverlay: savedNoOverlay, + ManagedBGP: savedManagedBGP, } configFile, configFileIsDefault = getConfigFilePath(ctx) @@ -2587,6 +2823,14 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d return "", err } + if err = buildNoOverlayConfig(&cfg); err != nil { + return "", err + } + + if err = buildManagedBGPConfig(&cfg); err != nil { + return "", err + } + tmpAuth, err := buildOvnAuth(exec, true, &cliConfig.OvnNorth, &cfg.OvnNorth, defaults.OvnNorthAddress) if err != nil { return "", err @@ -2603,6 +2847,11 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d return "", err } + // Perform cross-configuration validations + if err := validateConfig(); err != nil { + return "", err + } + klog.V(5).Infof("Features config: %+v", OVNKubernetesFeature) klog.V(5).Infof("Default config: %+v", Default) klog.V(5).Infof("Logging config: %+v", Logging) @@ -2617,6 +2866,8 @@ func initConfigWithPath(ctx *cli.Context, exec kexec.Interface, saPath string, d klog.V(5).Infof("Ovnkube Node config: %+v", OvnKubeNode) klog.V(5).Infof("Ovnkube Cluster Manager config: %+v", ClusterManager) klog.V(5).Infof("OVS Paths config: %+v", OvsPaths) + klog.V(5).Infof("No Overlay config: %+v", NoOverlay) + klog.V(5).Infof("Managed BGP config: %+v", ManagedBGP) return retConfigFile, nil } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 6127dff90e..2a108e39d6 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -2121,4 +2121,255 @@ udn-allowed-default-services= ns/svc, ns1/svc1 gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) }) + + Describe("No-Overlay Configuration", func() { + BeforeEach(func() { + err := PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Enable route advertisements - required for no-overlay transport + OVNKubernetesFeature.EnableRouteAdvertisements = true + }) + + It("validates transport option correctly", func() { + // Test valid geneve transport + Default.Transport = types.NetworkTransportGeneve + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid no-overlay transport with required options + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid transport + Default.Transport = "invalid-transport" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid transport")) + }) + + It("requires outbound-snat when transport is no-overlay", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = "" + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("outbound-snat is required")) + }) + + It("validates outbound-snat values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + + // Test valid enable + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid disable + NoOverlay.OutboundSNAT = NoOverlaySNATDisabled + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + NoOverlay.OutboundSNAT = "maybe" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid outbound-snat")) + }) + + It("requires routing when transport is no-overlay", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = "" + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("routing is required")) + }) + + It("validates routing values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + + // Test valid managed (requires topology) + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid unmanaged (topology not required) + NoOverlay.Routing = NoOverlayRoutingUnmanaged + ManagedBGP.Topology = "" + err = validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + NoOverlay.Routing = "automatic" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid routing")) + }) + + It("builds no-overlay config from file only", func() { + fileConfig := config{ + NoOverlay: NoOverlayConfig{ + OutboundSNAT: NoOverlaySNATEnabled, + Routing: NoOverlayRoutingManaged, + }, + ManagedBGP: ManagedBGPConfig{ + Topology: ManagedBGPTopologyFullMesh, + }, + } + err := buildNoOverlayConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + err = buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Config file values should be applied + gomega.Expect(NoOverlay.OutboundSNAT).To(gomega.Equal(NoOverlaySNATEnabled)) + gomega.Expect(NoOverlay.Routing).To(gomega.Equal(NoOverlayRoutingManaged)) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + }) + + It("requires topology when routing is managed", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.Topology = "" + err := validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("topology is required when routing=managed")) + }) + + It("validates topology values", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingManaged + + // Test valid full-mesh + ManagedBGP.Topology = ManagedBGPTopologyFullMesh + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test invalid value + ManagedBGP.Topology = "route-reflector" + err = validateNoOverlayConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("invalid topology")) + gomega.Expect(err.Error()).To(gomega.ContainSubstring(`must be "full-mesh"`)) + }) + + It("does not require topology when routing is unmanaged", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.OutboundSNAT = NoOverlaySNATEnabled + NoOverlay.Routing = NoOverlayRoutingUnmanaged + ManagedBGP.Topology = "" + err := validateNoOverlayConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) + + Describe("BGP Configuration", func() { + BeforeEach(func() { + err := PrepareTestConfig() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + It("parses BGP config from file with all fields set", func() { + fileConfig := config{ + ManagedBGP: ManagedBGPConfig{ + Topology: ManagedBGPTopologyFullMesh, + ASNumber: 64500, + }, + } + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64500))) + }) + + It("handles partial BGP config in file", func() { + fileConfig := config{ + ManagedBGP: savedManagedBGP, + } + fileConfig.ManagedBGP.Topology = ManagedBGPTopologyFullMesh + + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(ManagedBGP.Topology).To(gomega.Equal(ManagedBGPTopologyFullMesh)) + // ASNumber should retain default value from init + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64512))) + }) + + It("handles empty BGP config in file", func() { + fileConfig := config{ + ManagedBGP: savedManagedBGP, + } + err := buildManagedBGPConfig(&fileConfig) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Should retain default values without panicking + gomega.Expect(ManagedBGP.ASNumber).To(gomega.Equal(uint32(64512))) // default value + }) + + It("validates reserved AS number 0", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 0 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("0 is reserved")) + }) + + It("validates reserved AS number 23456 (AS_TRANS)", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 23456 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("23456 is reserved")) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("AS_TRANS")) + }) + + It("validates reserved AS number 65535", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 65535 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("65535 is reserved")) + }) + + It("validates reserved AS number 4294967295", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + ManagedBGP.ASNumber = 4294967295 + err := validateManagedBGPConfig() + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("4294967295 is reserved")) + }) + + It("accepts valid AS numbers", func() { + Default.Transport = types.NetworkTransportNoOverlay + NoOverlay.Routing = NoOverlayRoutingManaged + + // Test valid 16-bit AS number + ManagedBGP.ASNumber = 64500 + err := validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test default AS number + ManagedBGP.ASNumber = 64512 + err = validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Test valid 32-bit AS number + ManagedBGP.ASNumber = 100000 + err = validateManagedBGPConfig() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + }) }) diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/spec.go b/go-controller/pkg/crd/userdefinednetwork/v1/spec.go index cd65f08223..b4fc651575 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/spec.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/spec.go @@ -17,6 +17,16 @@ func (s *UserDefinedNetworkSpec) GetLocalnet() *LocalnetConfig { return nil } +func (s *UserDefinedNetworkSpec) GetTransport() TransportOption { + // UDN (namespace-scoped) does not support EVPN transport + return "" +} + +func (s *UserDefinedNetworkSpec) GetEVPN() *EVPNConfig { + // UDN (namespace-scoped) does not support EVPN + return nil +} + func (s *NetworkSpec) GetTopology() NetworkTopology { return s.Topology } @@ -32,3 +42,11 @@ func (s *NetworkSpec) GetLayer2() *Layer2Config { func (s *NetworkSpec) GetLocalnet() *LocalnetConfig { return s.Localnet } + +func (s *NetworkSpec) GetTransport() TransportOption { + return s.Transport +} + +func (s *NetworkSpec) GetEVPN() *EVPNConfig { + return s.EVPN +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index da58521b1f..ff43fd8476 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -98,6 +98,8 @@ import ( userdefinednetworkscheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/scheme" userdefinednetworkapiinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions" userdefinednetworkinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions/userdefinednetwork/v1" + vtepinformerfactory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions" + vtepinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/informers/externalversions/vtep/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -129,6 +131,7 @@ type WatchFactory struct { raFactory routeadvertisementsinformerfactory.SharedInformerFactory frrFactory frrinformerfactory.SharedInformerFactory networkQoSFactory networkqosinformerfactory.SharedInformerFactory + vtepFactory vtepinformerfactory.SharedInformerFactory informers map[reflect.Type]*informer stopChan chan struct{} @@ -158,6 +161,7 @@ func (wf *WatchFactory) ShallowClone() *WatchFactory { raFactory: wf.raFactory, frrFactory: wf.frrFactory, networkQoSFactory: wf.networkQoSFactory, + vtepFactory: wf.vtepFactory, informers: wf.informers, stopChan: wf.stopChan, @@ -281,6 +285,13 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory } } + // Initialize VTEP factory for EVPN support in combined mode (cluster-manager + ovnkube-controller). + if util.IsEVPNEnabled() { + wf.vtepFactory = vtepinformerfactory.NewSharedInformerFactory(ovnClientset.VTEPClient, resyncInterval) + // make sure shared informer is created for a factory, so on wf.vtepFactory.Start() it is initialized and caches are synced. + wf.vtepFactory.K8s().V1().VTEPs().Informer() + } + return wf, nil } @@ -646,6 +657,13 @@ func (wf *WatchFactory) Start() error { } } + if wf.vtepFactory != nil { + wf.vtepFactory.Start(wf.stopChan) + if err := waitForCacheSyncWithTimeout(wf.vtepFactory, wf.stopChan); err != nil { + return err + } + } + if wf.raFactory != nil { wf.raFactory.Start(wf.stopChan) if err := waitForCacheSyncWithTimeout(wf.raFactory, wf.stopChan); err != nil { @@ -706,6 +724,10 @@ func (wf *WatchFactory) Stop() { wf.cncFactory.Shutdown() } + if wf.vtepFactory != nil { + wf.vtepFactory.Shutdown() + } + if wf.raFactory != nil { wf.raFactory.Shutdown() } @@ -1081,6 +1103,13 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset wf.iFactory.Core().V1().Pods().Informer() } + // Initialize VTEP factory for EVPN support. + if util.IsEVPNEnabled() { + wf.vtepFactory = vtepinformerfactory.NewSharedInformerFactory(ovnClientset.VTEPClient, resyncInterval) + // make sure shared informer is created for a factory, so on wf.vtepFactory.Start() it is initialized and caches are synced. + wf.vtepFactory.K8s().V1().VTEPs().Informer() + } + if util.IsNetworkConnectEnabled() { wf.cncFactory = networkconnectinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkConnectClient, resyncInterval) wf.informers[ClusterNetworkConnectType], err = newQueuedInformer(eventQueueSize, @@ -1821,6 +1850,10 @@ func (wf *WatchFactory) ClusterNetworkConnectInformer() networkconnectinformer.C return wf.cncFactory.K8s().V1().ClusterNetworkConnects() } +func (wf *WatchFactory) VTEPInformer() vtepinformer.VTEPInformer { + return wf.vtepFactory.K8s().V1().VTEPs() +} + func (wf *WatchFactory) DNSNameResolverInformer() ocpnetworkinformerv1alpha1.DNSNameResolverInformer { return wf.dnsFactory.Network().V1alpha1().DNSNameResolvers() } diff --git a/go-controller/pkg/libovsdb/ops/chassis.go b/go-controller/pkg/libovsdb/ops/chassis.go index 83a2d6a3c2..4af8bc58b1 100644 --- a/go-controller/pkg/libovsdb/ops/chassis.go +++ b/go-controller/pkg/libovsdb/ops/chassis.go @@ -2,6 +2,9 @@ package ops import ( "context" + "fmt" + + "github.com/google/uuid" "k8s.io/apimachinery/pkg/util/sets" @@ -171,3 +174,19 @@ func CreateOrUpdateChassis(sbClient libovsdbclient.Client, chassis *sbdb.Chassis return nil } + +// validateRequestedChassisOption is a guard to ensure a caller is using the chassis-id (uuid format) +// for the requested chassis option. +func validateRequestedChassisOption(options map[string]string) error { + if len(options) == 0 { + return nil + } + chassisID, ok := options[RequestedChassis] + if !ok || chassisID == "" { + return nil + } + if _, err := uuid.Parse(chassisID); err != nil { + return fmt.Errorf("requested-chassis must be a valid UUID, got %q", chassisID) + } + return nil +} diff --git a/go-controller/pkg/libovsdb/ops/router.go b/go-controller/pkg/libovsdb/ops/router.go index 8266bc34dd..3dc443bef3 100644 --- a/go-controller/pkg/libovsdb/ops/router.go +++ b/go-controller/pkg/libovsdb/ops/router.go @@ -187,6 +187,9 @@ func CreateOrUpdateLogicalRouterPort(nbClient libovsdbclient.Client, router *nbd // and returns the corresponding ops func CreateOrUpdateLogicalRouterPortOps(nbClient libovsdbclient.Client, ops []ovsdb.Operation, router *nbdb.LogicalRouter, lrp *nbdb.LogicalRouterPort, chassis *nbdb.GatewayChassis, fields ...interface{}) ([]ovsdb.Operation, error) { + if err := validateRequestedChassisOption(lrp.Options); err != nil { + return nil, err + } opModels := []operationModel{} if chassis != nil { opModels = append(opModels, operationModel{ diff --git a/go-controller/pkg/libovsdb/ops/switch.go b/go-controller/pkg/libovsdb/ops/switch.go index 4136f96bba..ff0216bd03 100644 --- a/go-controller/pkg/libovsdb/ops/switch.go +++ b/go-controller/pkg/libovsdb/ops/switch.go @@ -323,6 +323,9 @@ func createOrUpdateLogicalSwitchPortsOps(nbClient libovsdbclient.Client, ops []o opModels := make([]operationModel, 0, len(lsps)+1) for _, lsp := range lsps { + if err := validateRequestedChassisOption(lsp.Options); err != nil { + return nil, err + } opModel := createOrUpdateLogicalSwitchPortOpModelWithCustomFields(sw, lsp, createLSP, customFields) opModels = append(opModels, opModel) } @@ -480,38 +483,3 @@ func DeleteLogicalSwitchPortsWithPredicateOps(nbClient libovsdbclient.Client, op m := newModelClient(nbClient) return m.DeleteOps(ops, opModels...) } - -// UpdateLogicalSwitchPortSetOptions sets options on the provided logical switch -// port adding any missing, removing the ones set to an empty value and updating -// existing -func UpdateLogicalSwitchPortSetOptions(nbClient libovsdbclient.Client, lsp *nbdb.LogicalSwitchPort) error { - options := lsp.Options - lsp, err := GetLogicalSwitchPort(nbClient, lsp) - if err != nil { - return err - } - - if lsp.Options == nil { - lsp.Options = map[string]string{} - } - - for k, v := range options { - if v == "" { - delete(lsp.Options, k) - } else { - lsp.Options[k] = v - } - } - - opModel := operationModel{ - // For LSP's Name is a valid index, so no predicate is needed - Model: lsp, - OnModelUpdates: []interface{}{&lsp.Options}, - ErrNotFound: true, - BulkOp: false, - } - - m := newModelClient(nbClient) - _, err = m.CreateOrUpdate(opModel) - return err -} diff --git a/go-controller/pkg/networkmanager/nad_controller_test.go b/go-controller/pkg/networkmanager/nad_controller_test.go index 6062cb9eac..a794c07e01 100644 --- a/go-controller/pkg/networkmanager/nad_controller_test.go +++ b/go-controller/pkg/networkmanager/nad_controller_test.go @@ -178,12 +178,14 @@ func (tnc *testNetworkController) Start(context.Context) error { func (tnc *testNetworkController) Stop() { tnc.tcm.Lock() defer tnc.tcm.Unlock() + fmt.Printf("stopping network: %s\n", testNetworkKey(tnc)) tnc.tcm.stopped = append(tnc.tcm.stopped, testNetworkKey(tnc)) } func (tnc *testNetworkController) Cleanup() error { tnc.tcm.Lock() defer tnc.tcm.Unlock() + fmt.Printf("cleaning up network: %s\n", testNetworkKey(tnc)) tnc.tcm.cleaned = append(tnc.tcm.cleaned, testNetworkKey(tnc)) return nil } @@ -842,8 +844,20 @@ func TestNADController(t *testing.T) { g.Expect(err).ToNot(gomega.HaveOccurred()) netController := nadController.networkController - g.Expect(nadController.networkController.Start()).To(gomega.Succeed()) - defer nadController.networkController.Stop() + // Drive reconciliation only for networks touched by the NAD operation + // to avoid assertions against transient async queue states. + syncTouchedNetworks := func(nadKey, prevNetwork string) { + networkNames := sets.New[string]() + if prevNetwork != "" { + networkNames.Insert(prevNetwork) + } + if currNetwork := nadController.nads[nadKey]; currNetwork != "" { + networkNames.Insert(currNetwork) + } + for _, network := range networkNames.UnsortedList() { + g.Expect(netController.syncNetwork(network)).To(gomega.Succeed()) + } + } for _, args := range tt.args { namespace, name, err := cache.SplitMetaNamespaceKey(args.nad) @@ -858,12 +872,14 @@ func TestNADController(t *testing.T) { g.Expect(err).To(gomega.Or(gomega.Not(gomega.HaveOccurred()), gomega.MatchError(apierrors.IsAlreadyExists, "AlreadyExists"))) } + prevNetwork := nadController.nads[args.nad] err = nadController.syncNAD(args.nad, nad) if args.wantErr { g.Expect(err).To(gomega.HaveOccurred()) } else { g.Expect(err).NotTo(gomega.HaveOccurred()) } + syncTouchedNetworks(args.nad, prevNetwork) } meetsExpectations := func(g gomega.Gomega) { @@ -943,8 +959,7 @@ func TestNADController(t *testing.T) { } } - g.Eventually(meetsExpectations).Should(gomega.Succeed()) - g.Consistently(meetsExpectations).Should(gomega.Succeed()) + meetsExpectations(g) }) } } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 25c1968ec5..512aa6fb53 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -703,60 +703,6 @@ func getOVNSBZone() (string, error) { return dbZone, nil } -/** HACK BEGIN **/ -// TODO(tssurya): Remove this HACK a few months from now. -// checkOVNSBNodeLRSR returns true if the logical router static route for the -// the given nodeSubnet is present in the SBDB -func checkOVNSBNodeLRSR(nodeSubnet *net.IPNet) bool { - var matchv4, matchv6 string - v6 := true - v4 := true - if config.IPv6Mode && utilnet.IsIPv6CIDR(nodeSubnet) { - matchv6 = fmt.Sprintf("match=\"reg7 == 0 && ip6.dst == %s\"", nodeSubnet) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "logical_flow", matchv6) - klog.Infof("Upgrade Hack: checkOVNSBNodeLRSR for node - %s : match %s : stdout - %s : stderr - %s : err %v", - nodeSubnet, matchv6, stdout, stderr, err) - v6 = (err == nil && stderr == "" && stdout != "") - } - if config.IPv4Mode && !utilnet.IsIPv6CIDR(nodeSubnet) { - matchv4 = fmt.Sprintf("match=\"reg7 == 0 && ip4.dst == %s\"", nodeSubnet) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "logical_flow", matchv4) - klog.Infof("Upgrade Hack: checkOVNSBNodeLRSR for node - %s : match %s : stdout - %s : stderr - %s : err %v", - nodeSubnet, matchv4, stdout, stderr, err) - v4 = (err == nil && stderr == "" && stdout != "") - } - return v6 && v4 -} - -func fetchLBNames() string { - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "name", "find", "Load_Balancer") - if err != nil || stderr != "" { - klog.Errorf("Upgrade hack: fetchLBNames could not fetch services %v/%v", err, stderr) - return stdout // will be empty and we will retry - } - klog.Infof("Upgrade Hack: fetchLBNames: stdout - %s : stderr - %s : err %v", stdout, stderr, err) - return stdout -} - -// lbExists returns true if the OVN load balancer for the corresponding namespace/name -// was created -func lbExists(lbNames, namespace, name string) bool { - stitchedServiceName := "Service_" + namespace + "/" + name - match := strings.Contains(lbNames, stitchedServiceName) - klog.Infof("Upgrade Hack: lbExists for service - %s/%s/%s : match - %v", - namespace, name, stitchedServiceName, match) - return match -} - -func portExists(namespace, name string) bool { - lspName := fmt.Sprintf("logical_port=%s", util.GetLogicalPortName(namespace, name)) - stdout, stderr, err := util.RunOVNSbctl("--bare", "--columns", "_uuid", "find", "Port_Binding", lspName) - klog.Infof("Upgrade Hack: portExists for pod - %s/%s : stdout - %s : stderr - %s", namespace, name, stdout, stderr) - return err == nil && stderr == "" && stdout != "" -} - -/** HACK END **/ - // Init executes the first steps to start the DefaultNodeNetworkController. // It is split from Start() and executed before UserDefinedNodeNetworkController (UDNNC) // to allow UDNNC to reference the openflow manager created in Init. @@ -820,12 +766,9 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { return fmt.Errorf("timed out waiting for the node zone %s to match the OVN Southbound db zone, err: %v, err1: %v", config.Default.Zone, err, err1) } - // if its nonIC OR IC=true and if its phase1 OR if its IC to IC upgrades - if !config.OVNKubernetesFeature.EnableInterconnect || sbZone == types.OvnDefaultZone || util.HasNodeMigratedZone(node) { // if its nonIC or if its phase1 - for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { - if err := auth.SetDBAuth(); err != nil { - return err - } + for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { + if err := auth.SetDBAuth(); err != nil { + return err } } @@ -956,17 +899,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.Infof("Starting the default node network controller") var err error - var node *corev1.Node if nc.mgmtPortController == nil { return fmt.Errorf("default node network controller hasn't been pre-started") } - if node, err = nc.watchFactory.GetNode(nc.name); err != nil { - return fmt.Errorf("error retrieving node %s: %v", nc.name, err) - } - - nodeAnnotator := kube.NewNodeAnnotator(nc.Kube, node.Name) waiter := newStartupWaiter() // Complete gateway initialization @@ -994,125 +931,6 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { } } - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - // STEP1: ovnkube-node start's up in remote zone and sets the "k8s.ovn.org/zone-name" above. - // STEP2: We delay the flip of connection for ovnkube-node(ovn-controller) to the new remote SBDB - // until the new remote ovnkube-controller has finished programming all the K8s core objects - // like routes, services and pods. Until then the ovnkube-node will talk to legacy SBDB. - // STEP3: Once we get the signal that the new SBDB is ready, we set the "k8s.ovn.org/remote-zone-migrated" annotation - // STEP4: We call setDBAuth to now point to new SBDB - // STEP5: Legacy ovnkube-master sees "k8s.ovn.org/remote-zone-migrated" annotation on this node and now knows that - // this node has remote-zone-migrated successfully and tears down old setup and creates new IC resource - // plumbing (takes 80ms based on what we saw in CI runs so we might still have that small window of disruption). - // NOTE: ovnkube-node in DPU host mode doesn't go through upgrades for OVN-IC and has no SBDB to connect to. Thus this part shall be skipped. - var syncNodes, syncServices, syncPods bool - if config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.OVNKubernetesFeature.EnableInterconnect && nc.sbZone != types.OvnDefaultZone && !util.HasNodeMigratedZone(node) { - klog.Info("Upgrade Hack: Interconnect is enabled") - var err1 error - start := time.Now() - err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 300*time.Second, true, func(_ context.Context) (bool, error) { - // we loop through all the nodes in the cluster and ensure ovnkube-controller has finished creating the LRSR required for pod2pod overlay communication - if !syncNodes { - nodes, err := nc.watchFactory.GetNodes() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving node %s: %v", nc.name, err) - return false, nil - } - for _, node := range nodes { - node := *node - if nc.name != node.Name && util.GetNodeZone(&node) != config.Default.Zone && !util.NoHostSubnet(&node) { - nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(&node, types.DefaultNetworkName) - if err != nil { - if util.IsAnnotationNotSetError(err) { - klog.Infof("Skipping node %q. k8s.ovn.org/node-subnets annotation was not found", node.Name) - continue - } - err1 = fmt.Errorf("unable to fetch node-subnet annotation for node %s: err, %v", node.Name, err) - return false, nil - } - for _, nodeSubnet := range nodeSubnets { - klog.Infof("Upgrade Hack: node %s, subnet %s", node.Name, nodeSubnet) - if !checkOVNSBNodeLRSR(nodeSubnet) { - err1 = fmt.Errorf("upgrade hack: unable to find LRSR for node %s", node.Name) - return false, nil - } - } - } - } - klog.Infof("Upgrade Hack: Syncing nodes took %v", time.Since(start)) - syncNodes = true - } - // we loop through all existing services in the cluster and ensure ovnkube-controller has finished creating LoadBalancers required for services to work - if !syncServices { - services, err := nc.watchFactory.GetServices() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving the services %v", err) - return false, nil - } - lbNames := fetchLBNames() - for _, s := range services { - // don't process headless service - if !util.ServiceTypeHasClusterIP(s) || !util.IsClusterIPSet(s) { - continue - } - if !lbExists(lbNames, s.Namespace, s.Name) { - return false, nil - } - } - klog.Infof("Upgrade Hack: Syncing services took %v", time.Since(start)) - syncServices = true - } - if !syncPods { - pods, err := nc.watchFactory.GetAllPods() - if err != nil { - err1 = fmt.Errorf("upgrade hack: error retrieving the services %v", err) - return false, nil - } - for _, p := range pods { - if !util.PodScheduled(p) || util.PodCompleted(p) || util.PodWantsHostNetwork(p) { - continue - } - if p.Spec.NodeName != nc.name { - // remote pod - continue - } - if !portExists(p.Namespace, p.Name) { - return false, nil - } - } - klog.Infof("Upgrade Hack: Syncing pods took %v", time.Since(start)) - syncPods = true - } - return true, nil - }) - if err != nil { - return fmt.Errorf("upgrade hack: failed while waiting for the remote ovnkube-controller to be ready: %v, %v", err, err1) - } - if err := util.SetNodeZoneMigrated(nodeAnnotator, nc.sbZone); err != nil { - return fmt.Errorf("upgrade hack: failed to set node zone annotation for node %s: %w", nc.name, err) - } - if err := nodeAnnotator.Run(); err != nil { - return fmt.Errorf("upgrade hack: failed to set node %s annotations: %w", nc.name, err) - } - klog.Infof("ovnkube-node %s finished annotating node with remote-zone-migrated; took: %v", nc.name, time.Since(start)) - for _, auth := range []config.OvnAuthConfig{config.OvnNorth, config.OvnSouth} { - if err := auth.SetDBAuth(); err != nil { - return fmt.Errorf("upgrade hack: Unable to set the authentication towards OVN local dbs") - } - } - klog.Infof("Upgrade hack: ovnkube-node %s finished setting DB Auth; took: %v", nc.name, time.Since(start)) - } - /** HACK END **/ - // Wait for management port and gateway resources to be created by the master klog.Infof("Waiting for gateway and management port readiness...") start := time.Now() @@ -1155,7 +973,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { defer nc.wg.Done() nodeController.Run(nc.stopChan) }() - } else { + } else if config.OvnKubeNode.Mode != types.NodeModeDPUHost { // attempt to cleanup the possibly stale bridge _, stderr, err := util.RunOVSVsctl("--if-exists", "del-br", "br-ext") if err != nil { @@ -1366,12 +1184,23 @@ func (nc *DefaultNodeNetworkController) reconcileConntrackUponEndpointSliceEvent klog.Errorf("Failed to get service port for endpoint %s: %v", oldIPStr, err) continue } - // upon update and delete events, flush conntrack only for UDP + // upon update and delete events, flush UDP conntrack for Service port if _, err := util.DeleteConntrackServicePort(oldIPStr, servicePort.Port, *oldPort.Protocol, netlink.ConntrackReplyAnyIP, nil); err != nil { klog.Errorf("Failed to delete conntrack entry for %s port %d: %v", oldIPStr, servicePort.Port, err) errors = append(errors, err) } + + // Flush UDP conntrack entries for NodePort (and LoadBalancer services that allocate NodePorts) + // TODO: Once vishvananda/netlink support ConntrackFilterType '--reply-port-src', we can use one DeleteConntrackServicePort() call + // conntrack entries for both ClusterIP and NodePort. + if util.ServiceTypeHasNodePort(svc) && servicePort.NodePort > 0 { + if _, err := util.DeleteConntrackServicePort(oldIPStr, servicePort.NodePort, *oldPort.Protocol, + netlink.ConntrackReplyAnyIP, nil); err != nil { + klog.Errorf("Failed to delete conntrack entry for %s NodePort %d: %v", oldIPStr, servicePort.NodePort, err) + errors = append(errors, err) + } + } } } } @@ -1595,9 +1424,23 @@ func (nc *DefaultNodeNetworkController) syncNodes(objs []interface{}) error { } // validateVTEPInterfaceMTU checks if the MTU of the interface that has ovn-encap-ip is big -// enough to carry the `config.Default.MTU` and the Geneve header. If the MTU is not big -// enough, it will return an error +// enough to carry the `config.Default.MTU` and the Geneve header (if overlay transport is used). +// If the MTU is not big enough, it will return an error func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { + // calc required MTU + var requiredMTU int + if config.Gateway.SingleNode || config.Default.Transport == types.NetworkTransportNoOverlay { + requiredMTU = config.Default.MTU + } else { + if config.IPv4Mode && !config.IPv6Mode { + // we run in single-stack IPv4 only + requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv4 + } else { + // we run in single-stack IPv6 or dual-stack mode + requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv6 + } + } + // OVN allows `external_ids:ovn-encap-ip` to be a list of IPs separated by comma ovnEncapIps := strings.Split(config.Default.EffectiveEncapIP, ",") for _, ip := range ovnEncapIps { @@ -1610,20 +1453,6 @@ func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error { return fmt.Errorf("could not get MTU for the interface with address %s: %w", ovnEncapIP, err) } - // calc required MTU - var requiredMTU int - if config.Gateway.SingleNode { - requiredMTU = config.Default.MTU - } else { - if config.IPv4Mode && !config.IPv6Mode { - // we run in single-stack IPv4 only - requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv4 - } else { - // we run in single-stack IPv6 or dual-stack mode - requiredMTU = config.Default.MTU + types.GeneveHeaderLengthIPv6 - } - } - if mtu < requiredMTU { return fmt.Errorf("MTU (%d) of network interface %s is too small for specified overlay MTU (%d)", mtu, interfaceName, requiredMTU) diff --git a/go-controller/pkg/node/default_node_network_controller_test.go b/go-controller/pkg/node/default_node_network_controller_test.go index ccfa18af0e..76891520c8 100644 --- a/go-controller/pkg/node/default_node_network_controller_test.go +++ b/go-controller/pkg/node/default_node_network_controller_test.go @@ -1769,7 +1769,6 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } ip string port uint16 protocol uint8 - family netlink.InetFamily } // Test data structure for table-driven tests @@ -1782,12 +1781,21 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } expectedFilters []expectedConntrackFilter } - // Helper to create EndpointSlice - makeEndpointSlice := func(portConfigs []struct { + type endpointPortConfig struct { name *string port int32 protocol corev1.Protocol - }, addresses []string) *discovery.EndpointSlice { + } + + type servicePortConfig struct { + name string + port int32 + targetPort int32 + protocol corev1.Protocol + } + + // Helper to create EndpointSlice + makeEndpointSlice := func(portConfigs []endpointPortConfig, addresses []string) *discovery.EndpointSlice { ports := make([]discovery.EndpointPort, len(portConfigs)) for i, pc := range portConfigs { p := pc.port @@ -1815,12 +1823,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } } // Helper to create Service - makeService := func(portConfigs []struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }) *corev1.Service { + makeService := func(portConfigs []servicePortConfig) *corev1.Service { ports := make([]corev1.ServicePort, len(portConfigs)) for i, pc := range portConfigs { ports[i] = corev1.ServicePort{ @@ -1842,6 +1845,16 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } } } + // Helper to create NodePort or LoadBalancer Service by invoking makeService + makeServiceWithNodePort := func(portConfigs []servicePortConfig, nodePorts []int32, svcType corev1.ServiceType) *corev1.Service { + svc := makeService(portConfigs) + svc.Spec.Type = svcType + for i := 0; i < len(nodePorts) && i < len(svc.Spec.Ports); i++ { + svc.Spec.Ports[i].NodePort = nodePorts[i] + } + return svc + } + // Helper function to build expected ConntrackFilter for verification buildExpectedFilter := func(ef expectedConntrackFilter) *netlink.ConntrackFilter { filter := &netlink.ConntrackFilter{} @@ -1942,13 +1955,8 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("old endpointslice is nil", reconcileConntrackTestCase{ - desc: "should not delete any conntrack entries when old endpoint is nil", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should not delete any conntrack entries when old endpoint is nil", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: nil, newEndpointSlice: &discovery.EndpointSlice{}, expectedConntrackCalls: 0, @@ -1957,69 +1965,42 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("service exists with matching unnamed port", reconcileConntrackTestCase{ - desc: "should delete conntrack with service port for unnamed port", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack with service port for unnamed port", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("service exists with matching named port", reconcileConntrackTestCase{ - desc: "should delete conntrack with service port for named port", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack with service port for named port", + service: makeService([]servicePortConfig{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("service exists but port name mismatch", reconcileConntrackTestCase{ - desc: "should skip conntrack deletion when port name doesn't match", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should skip conntrack deletion when port name doesn't match", + service: makeService([]servicePortConfig{{name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: strPtr("grpc"), port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: strPtr("grpc"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2032,11 +2013,7 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } desc: "should return early without deleting conntrack when service not found", service: nil, oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2046,19 +2023,10 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("TCP protocol should be skipped", reconcileConntrackTestCase{ - desc: "should skip conntrack deletion for TCP protocol", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: tcpProtocol}}), + desc: "should skip conntrack deletion for TCP protocol", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: tcpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: tcpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: tcpProtocol}}, []string{"10.0.0.1"}, ), newEndpointSlice: nil, @@ -2068,78 +2036,51 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("multiple endpoints", reconcileConntrackTestCase{ - desc: "should delete conntrack for each endpoint", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for each endpoint", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"}, ), newEndpointSlice: nil, expectedConntrackCalls: 3, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.2", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.3", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.2", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.3", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("IPv6 endpoint", reconcileConntrackTestCase{ - desc: "should delete conntrack for IPv6 endpoint", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for IPv6 endpoint", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"fd00::1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 1, expectedFilters: []expectedConntrackFilter{ - {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V6}, + {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), Entry("dual-stack endpoints", reconcileConntrackTestCase{ - desc: "should delete conntrack for both IPv4 and IPv6", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), + desc: "should delete conntrack for both IPv4 and IPv6", + service: makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, + []endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.0.0.1", "fd00::1"}, ), newEndpointSlice: nil, expectedConntrackCalls: 2, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V6}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "fd00::1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, }, }, ), @@ -2147,21 +2088,12 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } Entry("multiple service ports with matching names", reconcileConntrackTestCase{ desc: "should match correct service port by name for multiple ports", - service: makeService([]struct { - name string - port int32 - targetPort int32 - protocol corev1.Protocol - }{ + service: makeService([]servicePortConfig{ {name: "http", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, {name: "https", port: testServicePort2, targetPort: testEndpointPort2, protocol: udpProtocol}, }), oldEndpointSlice: makeEndpointSlice( - []struct { - name *string - port int32 - protocol corev1.Protocol - }{ + []endpointPortConfig{ {name: strPtr("http"), port: testEndpointPort1, protocol: udpProtocol}, {name: strPtr("https"), port: testEndpointPort2, protocol: udpProtocol}, }, @@ -2170,11 +2102,94 @@ add element inet ovn-kubernetes remote-node-ips-v6 { 2002:db8:1::4 } newEndpointSlice: nil, expectedConntrackCalls: 2, expectedFilters: []expectedConntrackFilter{ - {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, - {ip: "10.0.0.1", port: uint16(testServicePort2), protocol: syscall.IPPROTO_UDP, family: netlink.FAMILY_V4}, + {ip: "10.0.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.0.0.1", port: uint16(testServicePort2), protocol: syscall.IPPROTO_UDP}, }, }, ), + Entry("NodePort service", reconcileConntrackTestCase{ + desc: "should delete conntrack entries for both service port and NodePort", + service: makeServiceWithNodePort([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}, + []int32{30000}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("NodePort service with mixed protocols should only clean UDP NodePort", reconcileConntrackTestCase{ + desc: "should only delete conntrack for UDP NodePort, not TCP (protocol filtering)", + service: makeServiceWithNodePort([]servicePortConfig{ + {name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, + {name: "", port: testServicePort2, targetPort: testEndpointPort1, protocol: tcpProtocol}, + }, []int32{30000, 30001}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, // Only UDP: service port + NodePort (TCP port 30001 should be skipped) + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("NodePort service with multiple UDP ports", reconcileConntrackTestCase{ + desc: "should delete conntrack entries only for the specific NodePort that changed", + service: makeServiceWithNodePort([]servicePortConfig{ + {name: "dns", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}, + {name: "snmp", port: testServicePort2, targetPort: testEndpointPort1, protocol: udpProtocol}, + }, []int32{30000, 30002}, corev1.ServiceTypeNodePort), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: strPtr("dns"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: strPtr("dns"), port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, // service port + NodePort for "dns" only + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("LoadBalancer service with NodePort allocation", reconcileConntrackTestCase{ + desc: "should delete conntrack entries for both service port and NodePort", + service: func() *corev1.Service { + svc := makeServiceWithNodePort([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}, + []int32{30000}, corev1.ServiceTypeLoadBalancer) + svc.Status = corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{{IP: "5.5.5.5"}}, + }, + } + return svc + }(), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 2, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + {ip: "10.128.0.1", port: 30000, protocol: syscall.IPPROTO_UDP}, + }, + }), + Entry("LoadBalancer service with AllocateLoadBalancerNodePorts=false", func() reconcileConntrackTestCase { + allocateNodePorts := false + return reconcileConntrackTestCase{ + desc: "should only delete conntrack entries for service port (no NodePort)", + service: func() *corev1.Service { + svc := makeService([]servicePortConfig{{name: "", port: testServicePort1, targetPort: testEndpointPort1, protocol: udpProtocol}}) + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + svc.Spec.AllocateLoadBalancerNodePorts = &allocateNodePorts + svc.Status = corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{{IP: "5.5.5.5"}}, + }, + } + return svc + }(), + oldEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.1"}), + newEndpointSlice: makeEndpointSlice([]endpointPortConfig{{name: nil, port: testEndpointPort1, protocol: udpProtocol}}, []string{"10.128.0.2"}), + expectedConntrackCalls: 1, + expectedFilters: []expectedConntrackFilter{ + {ip: "10.128.0.1", port: uint16(testServicePort1), protocol: syscall.IPPROTO_UDP}, + }, + } + }()), ) }) }) diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index 6625b04e8c..f0eb9094d6 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -75,7 +75,7 @@ func getGatewayNextHops() ([]net.IP, string, error) { } } gatewayIntf := config.Gateway.Interface - if gatewayIntf != "" { + if gatewayIntf != "" && config.OvnKubeNode.Mode != types.NodeModeDPUHost { if bridgeName, _, err := util.RunOVSVsctl("port-to-br", gatewayIntf); err == nil { // This is an OVS bridge's internal port gatewayIntf = bridgeName diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 89b858d09c..475bd328a8 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -204,7 +204,7 @@ func newEndpointSlice(svcName, namespace string, endpoints []discovery.Endpoint, } } -func makeConntrackFilter(ip string, port int, protocol corev1.Protocol) *netlink.ConntrackFilter { +func makeConntrackFilter(ip string, port int, protocol corev1.Protocol, filterType netlink.ConntrackFilterType) *netlink.ConntrackFilter { filter := &netlink.ConntrackFilter{} var err error @@ -223,15 +223,17 @@ func makeConntrackFilter(ip string, port int, protocol corev1.Protocol) *netlink } ipAddress := net.ParseIP(ip) Expect(ipAddress).NotTo(BeNil()) - err = filter.AddIP(netlink.ConntrackOrigDstIP, ipAddress) + err = filter.AddIP(filterType, ipAddress) Expect(err).NotTo(HaveOccurred()) return filter } type ctFilterDesc struct { - ip string - port int + ip string + port int + protocol corev1.Protocol + filterType netlink.ConntrackFilterType } func addConntrackMocks(nlMock *mocks.NetLinkOps, filterDescs []ctFilterDesc) { @@ -242,7 +244,7 @@ func addConntrackMocks(nlMock *mocks.NetLinkOps, filterDescs []ctFilterDesc) { OnCallMethodArgs: []interface{}{ netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(ctf.ip, ctf.port, corev1.ProtocolTCP), + makeConntrackFilter(ctf.ip, ctf.port, ctf.protocol, ctf.filterType), }, RetArgList: []interface{}{uint(1), nil}, }) @@ -1789,7 +1791,7 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = wf Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"1.1.1.1", 8032}, {"10.129.0.2", 8032}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"1.1.1.1", 8032, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"10.129.0.2", 8032, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) Eventually(func() bool { @@ -1878,7 +1880,7 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = wf Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 0}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 0, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) Eventually(fExec.CalledMatchesExpected, "2s").Should(BeTrue(), fExec.ErrorDesc) @@ -1981,7 +1983,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -1991,7 +1993,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP)). + makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2057,7 +2059,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2067,7 +2069,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP)). + makeConntrackFilter("192.168.18.15", int(nodePort), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2182,7 +2184,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(externalIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(externalIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2253,7 +2255,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(lbIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP)). + makeConntrackFilter(lbIP1, int(service.Spec.Ports[0].Port), corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2322,7 +2324,7 @@ var _ = Describe("Node Operations", func() { On("ConntrackDeleteFilters", netlink.ConntrackTableType(netlink.ConntrackTable), netlink.InetFamily(netlink.FAMILY_V4), - makeConntrackFilter(service.Spec.ClusterIP, 80, corev1.ProtocolUDP)). + makeConntrackFilter(service.Spec.ClusterIP, 80, corev1.ProtocolUDP, netlink.ConntrackOrigDstIP)). Return(uint(1), nil). Run(func(_ mock.Arguments) { conntrackDeleteFiltersCount.Add(1) @@ -2427,7 +2429,7 @@ var _ = Describe("Node Operations", func() { return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) }).Should(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.10.10.1", 8034}, {"10.129.0.2", 8034}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.10.10.1", 8034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"10.129.0.2", 8034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -2555,11 +2557,11 @@ var _ = Describe("Node Operations", func() { }).Should(Equal(expectedLBExternalIPFlows2)) addConntrackMocks(netlinkMock, []ctFilterDesc{ - {"1.1.1.1", 8080}, - {"1.1.1.2", 8080}, - {"5.5.5.5", 8080}, - {"192.168.18.15", 31111}, - {"10.129.0.2", 8080}, + {"1.1.1.1", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"1.1.1.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"5.5.5.5", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, + {"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, }) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( @@ -2774,7 +2776,7 @@ var _ = Describe("Node Operations", func() { return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) }).Should(Succeed()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 38034}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 38034, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -2911,7 +2913,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3057,7 +3059,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3207,7 +3209,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3352,7 +3354,7 @@ var _ = Describe("Node Operations", func() { flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) @@ -3500,7 +3502,7 @@ var _ = Describe("Node Operations", func() { Expect(fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111")).To(Equal(expectedFlows)) - addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) + addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}, {"192.168.18.15", 31111, corev1.ProtocolTCP, netlink.ConntrackOrigDstIP}}) Expect(fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) diff --git a/go-controller/pkg/node/udn_isolation.go b/go-controller/pkg/node/udn_isolation.go index 6a24afd89d..7c41105948 100644 --- a/go-controller/pkg/node/udn_isolation.go +++ b/go-controller/pkg/node/udn_isolation.go @@ -357,7 +357,11 @@ func (m *UDNHostIsolationManager) runKubeletRestartTracker(ctx context.Context) klog.Errorf("Error closing dbus connection for UDN isolation: %v", err) } return - case signal := <-signalChan: + case signal, ok := <-signalChan: + if !ok || signal == nil { + // Channel was closed, connection is shutting down + return + } klog.V(5).Infof("D-Bus event received: %#v", signal) // Extract unit name from path unitPath := signal.Path diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index f1e14e574c..f67f3b3972 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -634,7 +634,7 @@ func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostS } err := libovsdbops.CreateOrUpdateLogicalSwitch(bnc.nbClient, &logicalSwitch, &logicalSwitch.OtherConfig, - &logicalSwitch.LoadBalancerGroup) + &logicalSwitch.LoadBalancerGroup, &logicalSwitch.ExternalIDs) if err != nil { return fmt.Errorf("failed to add logical switch %+v: %v", logicalSwitch, err) } @@ -1035,20 +1035,6 @@ func (bnc *BaseNetworkController) GetLocalZoneNodes() ([]*corev1.Node, error) { // isLocalZoneNode returns true if the node is part of the local zone. func (bnc *BaseNetworkController) isLocalZoneNode(node *corev1.Node) bool { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - if bnc.zone == types.OvnDefaultZone { - return !util.HasNodeMigratedZone(node) - } - /** HACK END **/ return util.GetNodeZone(node) == bnc.zone } diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index a83097e35f..6903541f73 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -538,15 +538,6 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *corev1.Pod, nadKe if !lspExist || len(existingLSP.Options["iface-id-ver"]) != 0 { lsp.Options["iface-id-ver"] = string(pod.UID) } - // Bind the port to the node's chassis; prevents ping-ponging between - // chassis if ovnkube-node isn't running correctly and hasn't cleared - // out iface-id for an old instance of this pod, and the pod got - // rescheduled. - - if !config.Kubernetes.DisableRequestedChassis { - lsp.Options[libovsdbops.RequestedChassis] = pod.Spec.NodeName - } - // let's calculate if this network controller's role for this pod // and pass that information while determining the podAnnotations networkRole, err := bnc.GetNetworkRole(pod) @@ -559,6 +550,28 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *corev1.Pod, nadKe return nil, nil, nil, false, nil } + // Bind the port to the node's chassis. + // For IC this is required for Layer 2 networks with remote ports. + // For Legacy with OVN Central Mode it prevents ping-ponging between + // chassis if ovnkube-node isn't running correctly and hasn't cleared + // out iface-id for an old instance of this pod, and the pod got + // rescheduled. + var node *corev1.Node + if !config.Kubernetes.DisableRequestedChassis { + node, err = bnc.watchFactory.GetNode(pod.Spec.NodeName) + if err != nil { + return nil, nil, nil, false, err + } + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + return nil, nil, nil, false, ovntypes.NewSuppressedError(err) + } + return nil, nil, nil, false, err + } + lsp.Options[libovsdbops.RequestedChassis] = chassisID + } + // Although we have different code to allocate the pod annotation for the // default network and user-defined networks, at the time of this writing they // are functionally equivalent and the only reason to keep them separated is @@ -800,7 +813,8 @@ func calculateStaticMAC(podDesc string, mac string) (net.HardwareAddr, error) { } // allocatePodAnnotation and update the corresponding pod annotation. -func (bnc *BaseNetworkController) allocatePodAnnotation(pod *corev1.Pod, existingLSP *nbdb.LogicalSwitchPort, podDesc, nadKey string, network *nadapi.NetworkSelectionElement, networkRole string) (*util.PodAnnotation, bool, error) { +func (bnc *BaseNetworkController) allocatePodAnnotation(pod *corev1.Pod, existingLSP *nbdb.LogicalSwitchPort, podDesc, + nadKey string, network *nadapi.NetworkSelectionElement, networkRole string) (*util.PodAnnotation, bool, error) { var releaseIPs bool var podMac net.HardwareAddr var podIfAddrs []*net.IPNet diff --git a/go-controller/pkg/ovn/controller/admin_network_policy/status.go b/go-controller/pkg/ovn/controller/admin_network_policy/status.go index 5ffb2fcc2d..828f159370 100644 --- a/go-controller/pkg/ovn/controller/admin_network_policy/status.go +++ b/go-controller/pkg/ovn/controller/admin_network_policy/status.go @@ -46,6 +46,22 @@ const ( policyNotReadyReason = "SetupFailed" ) +// doesStatusNeedAnUpdate compares the existing condition with the new condition +// and returns true if an update is needed, false if the status is already in the desired state. +// This helps avoid unnecessary API server calls when the status hasn't changed. +func doesStatusNeedAnUpdate(existingCondition *metav1.Condition, newCondition metav1.Condition) bool { + if existingCondition == nil { + return true // condition doesn't exist yet, needs to be created + } + // Check if Status, Reason, and Message are all the same - if so, no update needed + if existingCondition.Status == newCondition.Status && + existingCondition.Reason == newCondition.Reason && + existingCondition.Message == newCondition.Message { + return false + } + return true +} + // updateANPStatusToReady updates the status of the policy to reflect that it is ready // Each zone's ovnkube-controller will call this, hence let's update status using server-side-apply func (c *Controller) updateANPStatusToReady(anpName string) error { @@ -59,8 +75,6 @@ func (c *Controller) updateANPStatusToReady(anpName string) error { if err != nil { return fmt.Errorf("unable to update the status of ANP %s, err: %v", anpName, err) } - klog.V(5).Infof("Patched the status of ANP %v with condition type %v/%v", - anpName, policyReadyStatusType+c.zone, metav1.ConditionTrue) return nil } @@ -83,8 +97,6 @@ func (c *Controller) updateANPStatusToNotReady(anpName, message string) error { if err != nil { return fmt.Errorf("unable update the status of ANP %s, err: %v", anpName, err) } - klog.V(3).Infof("Patched the status of ANP %v with condition type %v/%v and reason %s/%s", - anpName, policyReadyStatusType+c.zone, metav1.ConditionFalse, policyNotReadyReason, message) return nil } @@ -94,6 +106,10 @@ func (c *Controller) updateANPZoneStatusCondition(newCondition metav1.Condition, return err } existingCondition := meta.FindStatusCondition(anp.Status.Conditions, newCondition.Type) + if !doesStatusNeedAnUpdate(existingCondition, newCondition) { + // status is already in the desired state, skip the update to reduce API server load + return nil + } if existingCondition == nil { newCondition.LastTransitionTime = metav1.NewTime(time.Now()) } else { @@ -109,6 +125,10 @@ func (c *Controller) updateANPZoneStatusCondition(newCondition metav1.Condition, WithStatus(anpapiapply.AdminNetworkPolicyStatus().WithConditions(newCondition)) _, err = c.anpClientSet.PolicyV1alpha1().AdminNetworkPolicies(). ApplyStatus(context.TODO(), applyObj, metav1.ApplyOptions{FieldManager: c.zone, Force: true}) + if err == nil { + klog.V(5).Infof("Patched the status of ANP %s with condition type %s/%s, reason %s, message: %s", + anpName, newCondition.Type, newCondition.Status, newCondition.Reason, newCondition.Message) + } return err } @@ -125,8 +145,6 @@ func (c *Controller) updateBANPStatusToReady(banpName string) error { if err != nil { return fmt.Errorf("unable to update the status of BANP %s, err: %v", banpName, err) } - klog.V(5).Infof("Patched the status of BANP %v with condition type %v/%v", - banpName, policyReadyStatusType+c.zone, metav1.ConditionTrue) return nil } @@ -146,8 +164,6 @@ func (c *Controller) updateBANPStatusToNotReady(banpName, message string) error if err != nil { return fmt.Errorf("unable update the status of BANP %s, err: %v", banpName, err) } - klog.V(3).Infof("Patched the status of BANP %v with condition type %v/%v and reason %s", - banpName, policyReadyStatusType+c.zone, metav1.ConditionFalse, policyNotReadyReason) return nil } @@ -157,6 +173,10 @@ func (c *Controller) updateBANPZoneStatusCondition(newCondition metav1.Condition return err } existingCondition := meta.FindStatusCondition(banp.Status.Conditions, newCondition.Type) + if !doesStatusNeedAnUpdate(existingCondition, newCondition) { + // status is already in the desired state, skip the update to reduce API server load + return nil + } if existingCondition == nil { newCondition.LastTransitionTime = metav1.NewTime(time.Now()) } else { @@ -172,5 +192,9 @@ func (c *Controller) updateBANPZoneStatusCondition(newCondition metav1.Condition WithStatus(anpapiapply.BaselineAdminNetworkPolicyStatus().WithConditions(newCondition)) _, err = c.anpClientSet.PolicyV1alpha1().BaselineAdminNetworkPolicies(). ApplyStatus(context.TODO(), applyObj, metav1.ApplyOptions{FieldManager: c.zone, Force: true}) + if err == nil { + klog.V(5).Infof("Patched the status of BANP %s with condition type %s/%s, reason %s, message: %s", + banpName, newCondition.Type, newCondition.Status, newCondition.Reason, newCondition.Message) + } return err } diff --git a/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go b/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go index 6a28fa60d3..02d2ece268 100644 --- a/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go +++ b/go-controller/pkg/ovn/controller/admin_network_policy/status_test.go @@ -147,6 +147,177 @@ func newANPControllerWithDBSetup(dbSetup libovsdbtest.TestSetup, initANPs anpapi return controller, nil } +func TestDoesStatusNeedAnUpdate(t *testing.T) { + tests := []struct { + name string + existingCondition *metav1.Condition + newCondition metav1.Condition + expectedResult bool + }{ + { + name: "nil existing condition should need update", + existingCondition: nil, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: true, + }, + { + name: "same status, reason, message should not need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: false, + }, + { + name: "different status should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionFalse, + Reason: "SetupFailed", + Message: "error", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "success", + }, + expectedResult: true, + }, + { + name: "different reason should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "OldReason", + Message: "success", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "NewReason", + Message: "success", + }, + expectedResult: true, + }, + { + name: "different message should need update", + existingCondition: &metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "old message", + }, + newCondition: metav1.Condition{ + Type: "Ready-In-Zone-test", + Status: metav1.ConditionTrue, + Reason: "SetupSucceeded", + Message: "new message", + }, + expectedResult: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := doesStatusNeedAnUpdate(tt.existingCondition, tt.newCondition) + if result != tt.expectedResult { + t.Errorf("doesStatusNeedAnUpdate() = %v, want %v", result, tt.expectedResult) + } + }) + } +} + +func TestStatusUpdateSkippedWhenUnchanged(t *testing.T) { + g := gomega.NewGomegaWithT(t) + controller, err := newANPController( + anpapi.AdminNetworkPolicyList{ + Items: []anpapi.AdminNetworkPolicy{initialANP}, + }, + anpapi.BaselineAdminNetworkPolicyList{ + Items: []anpapi.BaselineAdminNetworkPolicy{initialBANP}, + }, + ) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // First call - should make an API call to set status to Ready + err = controller.updateANPStatusToReady(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for the status to be reflected in the lister + g.Eventually(func() int { + latestANP, err := controller.anpLister.Get(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + return len(latestANP.Status.Conditions) + }).Should(gomega.Equal(1)) + + // Get the number of actions after first update + actionsAfterANPFirstUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + + // Second call with same status - should NOT make an API call + err = controller.updateANPStatusToReady(initialANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify no new actions were added (ApplyStatus was skipped) + actionsAfterANPSecondUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterANPSecondUpdate).To(gomega.Equal(actionsAfterANPFirstUpdate), + "Expected no new API calls when status is unchanged, but got %d new actions", + actionsAfterANPSecondUpdate-actionsAfterANPFirstUpdate) + + // Third call with different status (NotReady) - SHOULD make an API call + err = controller.updateANPStatusToNotReady(initialANP.Name, "something went wrong") + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify a new action WAS added (ApplyStatus was called) + actionsAfterANPThirdUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterANPThirdUpdate).To(gomega.Equal(actionsAfterANPFirstUpdate+1), + "Expected 1 new API call when status changed to NotReady, but got %d new actions", + actionsAfterANPThirdUpdate-actionsAfterANPSecondUpdate) + + // Now test BANP + err = controller.updateBANPStatusToReady(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + g.Eventually(func() int { + latestBANP, err := controller.banpLister.Get(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + return len(latestBANP.Status.Conditions) + }).Should(gomega.Equal(1)) + + actionsAfterBANPFirstUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + + // Second call with same status - should NOT make an API call + err = controller.updateBANPStatusToReady(initialBANP.Name) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + actionsAfterBANPSecondUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterBANPSecondUpdate).To(gomega.Equal(actionsAfterBANPFirstUpdate), + "Expected no new API calls when BANP status is unchanged") + + // Third call with different status (NotReady) - SHOULD make an API call + err = controller.updateBANPStatusToNotReady(initialBANP.Name, "something went wrong") + g.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify a new action WAS added (ApplyStatus was called) + actionsAfterBANPThirdUpdate := len(controller.anpClientSet.(*anpfake.Clientset).Actions()) + g.Expect(actionsAfterBANPThirdUpdate).To(gomega.Equal(actionsAfterBANPFirstUpdate+1), + "Expected 1 new API call when BANP status changed to NotReady, but got %d new actions", + actionsAfterBANPThirdUpdate-actionsAfterBANPSecondUpdate) +} + func TestAddOrUpdateAdminNetworkPolicyStatus(t *testing.T) { anpName := "harry-potter" banpName := "jon-snow" diff --git a/go-controller/pkg/ovn/controller/apbroute/repair.go b/go-controller/pkg/ovn/controller/apbroute/repair.go index 75c50765b1..56867f82e5 100644 --- a/go-controller/pkg/ovn/controller/apbroute/repair.go +++ b/go-controller/pkg/ovn/controller/apbroute/repair.go @@ -155,7 +155,7 @@ func (c *ExternalGatewayMasterController) Repair() error { // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode if !podHasAnyECMPRoutes { for _, ovnRoute := range ovnRoutes { - node := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) if err := c.nbClient.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { return fmt.Errorf("error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", podIP, node, err) diff --git a/go-controller/pkg/ovn/controller/networkconnect/controller_test.go b/go-controller/pkg/ovn/controller/networkconnect/controller_test.go index 45884776fe..f683a38e23 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/controller_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/controller_test.go @@ -144,6 +144,7 @@ func createTestNode(n testNode) *corev1.Node { if len(n.nodeSubnets) > 0 { annotations[ovnNodeSubnetsAnnotation] = buildNodeSubnetAnnotation(n.nodeSubnets) } + annotations[util.OvnNodeChassisID] = chassisIDForNode(n.name) return &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ diff --git a/go-controller/pkg/ovn/controller/networkconnect/topology.go b/go-controller/pkg/ovn/controller/networkconnect/topology.go index 2b54570ce4..471fc0d93b 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/topology.go +++ b/go-controller/pkg/ovn/controller/networkconnect/topology.go @@ -557,8 +557,15 @@ func (c *Controller) ensureConnectPortsOps(ops []ovsdb.Operation, cnc *networkco } else { // Remote node: create only the connect-router side port with requested-chassis set // This makes the port type: remote in SB, enabling cross-zone tunneling + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + return nil, ovntypes.NewSuppressedError(err) + } + return nil, fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } ops, err = c.createRouterPortOps(ops, connectRouterName, connectPortName, portPairInfo.connectPortIPs, - "", cncName, networkID, nodeID, tunnelKey, node.Name) + "", cncName, networkID, nodeID, tunnelKey, chassisID) if err != nil { return nil, fmt.Errorf("failed to create remote connect router port ops %s: %v", connectPortName, err) } diff --git a/go-controller/pkg/ovn/controller/networkconnect/topology_test.go b/go-controller/pkg/ovn/controller/networkconnect/topology_test.go index ddb5ad624e..b182f4099e 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/topology_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/topology_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/onsi/gomega" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -32,6 +33,10 @@ import ( mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks/multinetwork" ) +func chassisIDForNode(nodeName string) string { + return uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() +} + type testNetworkManager struct { networkmanager.FakeNetworkManager nodeHas map[string]bool @@ -377,7 +382,7 @@ func TestCreateRouterPortOps(t *testing.T) { networkID: 1, nodeID: 2, tunnelKey: 101, - remoteChassisName: "node2", + remoteChassisName: chassisIDForNode("node2"), initialDB: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ UUID: "router-uuid", @@ -555,6 +560,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node1", Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), util.OvnNodeZoneName: "node1", // local zone }, }, @@ -587,6 +593,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node2", // remote node Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), util.OvnNodeZoneName: "node2", // different zone }, }, @@ -619,6 +626,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node1", // local node Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), util.OvnNodeZoneName: "node1", // local zone }, }, @@ -628,6 +636,7 @@ func TestEnsureConnectPortsOps(t *testing.T) { Name: "node2", // remote node Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), util.OvnNodeZoneName: "node2", // different zone }, }, @@ -833,7 +842,7 @@ func TestCleanupNetworkConnections(t *testing.T) { libovsdbops.RouterNameKey.String(): "connect_router_test-cnc", }, Options: map[string]string{ - libovsdbops.RequestedChassis: "node2", + libovsdbops.RequestedChassis: chassisIDForNode("node2"), }, // Remote port has no peer }, @@ -916,6 +925,7 @@ func TestSyncNetworkConnectionsInactiveNetwork(t *testing.T) { Annotations: map[string]string{ util.OvnNodeZoneName: "zone1", util.OvnNodeID: "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), "k8s.ovn.org/node-subnets": string(subnetsBytes), }, }, @@ -1496,6 +1506,7 @@ func TestEnsureRoutingPoliciesOps(t *testing.T) { Name: "node1", Annotations: map[string]string{ "k8s.ovn.org/node-id": "1", + util.OvnNodeChassisID: chassisIDForNode("node1"), }, }, }, @@ -1504,6 +1515,7 @@ func TestEnsureRoutingPoliciesOps(t *testing.T) { Name: "node2", Annotations: map[string]string{ "k8s.ovn.org/node-id": "2", + util.OvnNodeChassisID: chassisIDForNode("node2"), }, }, }, diff --git a/go-controller/pkg/ovn/controller/services/node_tracker.go b/go-controller/pkg/ovn/controller/services/node_tracker.go index 341764904a..7079ba2081 100644 --- a/go-controller/pkg/ovn/controller/services/node_tracker.go +++ b/go-controller/pkg/ovn/controller/services/node_tracker.go @@ -56,10 +56,6 @@ type nodeInfo struct { // The node's zone zone string - /** HACK BEGIN **/ - // has the node migrated to remote? - migrated bool - /** HACK END **/ // The list of node's management IPs mgmtIPs []net.IP @@ -127,7 +123,6 @@ func (nt *nodeTracker) Start(nodeInformer coreinformers.NodeInformer) (cache.Res oldObj.Name != newObj.Name || util.NodeHostCIDRsAnnotationChanged(oldObj, newObj) || util.NodeZoneAnnotationChanged(oldObj, newObj) || - util.NodeMigratedZoneAnnotationChanged(oldObj, newObj) || util.NoHostSubnet(oldObj) != util.NoHostSubnet(newObj) { nt.updateNode(newObj) } @@ -154,7 +149,7 @@ func (nt *nodeTracker) Start(nodeInformer coreinformers.NodeInformer) (cache.Res // updateNodeInfo updates the node info cache, and syncs all services // if it changed. -func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet, mgmtIPs []net.IP, zone string, nodePortDisabled, migrated bool) { +func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisID string, l3gatewayAddresses, hostAddresses []net.IP, podSubnets []*net.IPNet, mgmtIPs []net.IP, zone string, nodePortDisabled bool) { ni := nodeInfo{ name: nodeName, l3gatewayAddresses: l3gatewayAddresses, @@ -166,7 +161,6 @@ func (nt *nodeTracker) updateNodeInfo(nodeName, switchName, routerName, chassisI chassisID: chassisID, nodePortDisabled: nodePortDisabled, zone: zone, - migrated: migrated, } for i := range podSubnets { ni.podSubnets = append(ni.podSubnets, *podSubnets[i]) // de-pointer @@ -275,7 +269,6 @@ func (nt *nodeTracker) updateNode(node *corev1.Node) { mgmtIPs, util.GetNodeZone(node), !nodePortEnabled, - util.HasNodeMigratedZone(node), ) } @@ -285,24 +278,6 @@ func (nt *nodeTracker) updateNode(node *corev1.Node) { func (nt *nodeTracker) getZoneNodes() []nodeInfo { out := make([]nodeInfo, 0, len(nt.nodes)) for _, node := range nt.nodes { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. Legacy ovnkube-master must not delete the service load - // balancers for this node till it has finished migration - if nt.zone == types.OvnDefaultZone { - if !node.migrated { - out = append(out, node) - } - continue - } - /** HACK END **/ if node.zone == nt.zone { out = append(out, node) } diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index dd27486c5b..5e850fef14 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -764,14 +764,14 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.PodType: pod, ok := obj.(*corev1.Pod) if !ok { - return fmt.Errorf("could not cast %T object to *knet.Pod", obj) + return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) } return h.oc.ensurePod(nil, pod, true) case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast %T object to *kapi.Node", obj) + return fmt.Errorf("could not cast %T object to *corev1.Node", obj) } if config.HybridOverlay.Enabled { if util.NoHostSubnet(node) { @@ -895,7 +895,7 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.NamespaceType: ns, ok := obj.(*corev1.Namespace) if !ok { - return fmt.Errorf("could not cast %T object to *kapi.Namespace", obj) + return fmt.Errorf("could not cast %T object to *corev1.Namespace", obj) } return h.oc.AddNamespace(ns) @@ -919,11 +919,11 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int case factory.NodeType: newNode, ok := newObj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast newObj of type %T to *kapi.Node", newObj) + return fmt.Errorf("could not cast newObj of type %T to *corev1.Node", newObj) } oldNode, ok := oldObj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast oldObj of type %T to *kapi.Node", oldObj) + return fmt.Errorf("could not cast oldObj of type %T to *corev1.Node", oldObj) } var switchToOvnNode bool if config.HybridOverlay.Enabled { @@ -1125,7 +1125,7 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("could not cast obj of type %T to *knet.Node", obj) + return fmt.Errorf("could not cast obj of type %T to *corev1.Node", obj) } return h.oc.deleteNodeEvent(node) diff --git a/go-controller/pkg/ovn/egressgw_test.go b/go-controller/pkg/ovn/egressgw_test.go index 9b6f4810eb..1e93c4fa6d 100644 --- a/go-controller/pkg/ovn/egressgw_test.go +++ b/go-controller/pkg/ovn/egressgw_test.go @@ -136,7 +136,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -172,7 +172,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -276,7 +276,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -312,7 +312,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -420,7 +420,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -466,7 +466,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -898,7 +898,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -969,7 +969,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1079,7 +1079,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1119,7 +1119,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1240,7 +1240,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1280,7 +1280,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1411,7 +1411,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1451,7 +1451,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1592,7 +1592,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1632,7 +1632,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1665,7 +1665,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1711,7 +1711,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1745,7 +1745,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1785,7 +1785,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1939,7 +1939,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1979,7 +1979,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2013,7 +2013,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2053,7 +2053,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2086,7 +2086,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2132,7 +2132,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2165,7 +2165,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2211,7 +2211,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2245,7 +2245,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2285,7 +2285,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2330,7 +2330,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2370,7 +2370,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - "requested-chassis": "node1", + "requested-chassis": chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2484,7 +2484,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2618,7 +2618,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2759,7 +2759,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2951,7 +2951,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, @@ -3133,7 +3133,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, @@ -3704,7 +3704,7 @@ func injectNode(fakeOvn *FakeOVN) { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{"k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"169.254.33.2/24", "next-hop":"169.254.33.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("node1"), "k8s.ovn.org/node-subnets": `{"default":"10.128.1.0/24"}`, }, }, diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 75662bd2c4..41f2e9a6af 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1174,20 +1174,6 @@ func (e *EgressIPController) isPodScheduledinLocalZone(pod *corev1.Pod) bool { // isLocalZoneNode returns true if the node is part of the local zone. func (e *EgressIPController) isLocalZoneNode(node *corev1.Node) bool { - /** HACK BEGIN **/ - // TODO(tssurya): Remove this HACK a few months from now. This has been added only to - // minimize disruption for upgrades when moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - if e.zone == types.OvnDefaultZone { - return !util.HasNodeMigratedZone(node) - } - /** HACK END **/ return util.GetNodeZone(node) == e.zone } diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 07d251bee3..bd4402eea0 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/google/uuid" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" "github.com/urfave/cli/v2" @@ -163,9 +164,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s\"}", ni.transitPortIP), // used only for ic=true test "k8s.ovn.org/zone-name": ni.zone, } - if ni.zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = "" - } nodes = append(nodes, getNodeObj(fmt.Sprintf("node%d", nodeSuffix), annotations, map[string]string{})) nodeSuffix = nodeSuffix + 1 } @@ -195,9 +193,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv6\":\"%s\"}", ni.transitPortIP), // used only for ic=true test "k8s.ovn.org/zone-name": ni.zone, } - if ni.zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = "" - } nodes = append(nodes, getNodeObj(fmt.Sprintf("node%d", nodeSuffix), annotations, map[string]string{})) nodeSuffix = nodeSuffix + 1 } @@ -1816,12 +1811,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/egress-assignable": "", } node2 := nodes[1] - if node1Zone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } - if node2Zone != "global" { - node2.Annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } + egressPod := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(eipNamespace) @@ -2623,9 +2613,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node1Zone, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4), } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2637,9 +2624,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4), } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } labels = map[string]string{} node2 := getNodeObj(node2Name, annotations, labels) @@ -3430,7 +3414,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\"}", v4Node1Subnet, v6Node1Subnet), "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"100.88.0.2/16\", \"ipv6\": \"fd97::2/64\"}", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4), - "k8s.ovn.org/zone-name": node1Name, + "k8s.ovn.org/zone-name": "global", } node := getNodeObj(node1Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch fakeOvn.startWithDBSetup( @@ -3619,7 +3603,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" } if !isnode1Local { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" // used only for ic=true test } node1 := getNodeObj(node1Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch @@ -3636,7 +3619,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" if !isnode2Local { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" // used only for ic=true test } node2 := getNodeObj(node2Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch dynamicNeighRouters := "true" @@ -4777,9 +4759,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod lives on node 1, therefore set the zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:feff:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) dynamicNeighRouters := "true" @@ -5015,9 +4994,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod is host by node 1 therefore we set its zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:fedf:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) dynamicNeighRouters := "true" @@ -5272,9 +5248,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" // pod is hosted by node 1 therefore we set its zone node1 := newNodeGlobalZoneNotEgressableV6Only(node1Name, "0:0:0:0:0:feff:c0a8:8e0c/64") node1.Annotations["k8s.ovn.org/zone-name"] = podZone - if podZone != "global" { - node1.Annotations["k8s.ovn.org/remote-zone-migrated"] = podZone // used only for ic=true test - } _, node1Subnet, _ := net.ParseCIDR(v6Node1Subnet) _, node2Subnet, _ := net.ParseCIDR(v6Node2Subnet) egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets(nil, types.DefaultNetworkName, DefaultNetworkControllerName) @@ -5602,9 +5575,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -5617,9 +5587,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } node2 := getNodeObj(node2Name, annotations, labels) _, node2Subnet, _ := net.ParseCIDR(v4Node2Subnet) @@ -7076,7 +7043,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -7429,7 +7396,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -7534,9 +7501,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), } - if node1Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node1Zone // used only for ic=true test - } labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -7550,9 +7514,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), } - if node2Zone != "global" { - annotations["k8s.ovn.org/remote-zone-migrated"] = node2Zone // used only for ic=true test - } node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP1 := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -7856,7 +7817,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "namespace": egressPod1.Namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: egressPod1.Spec.NodeName, + libovsdbops.RequestedChassis: node1.Annotations[util.OvnNodeChassisID], "iface-id-ver": egressPod1.Name, }, PortSecurity: []string{podAddr}, @@ -11676,7 +11637,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" } if isPodRemote { annotations["k8s.ovn.org/zone-name"] = "remote" - annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" } node2 := getNodeObj(node2Name, annotations, map[string]string{}) @@ -15722,10 +15682,20 @@ func getReRouteStaticRoute(clusterSubnet, nextHop string) *nbdb.LogicalRouterSta } func getNodeObj(nodeName string, annotations, labels map[string]string) corev1.Node { + nodeAnnotations := map[string]string{} + if annotations != nil { + nodeAnnotations = make(map[string]string, len(annotations)+1) + for k, v := range annotations { + nodeAnnotations[k] = v + } + } + if _, ok := nodeAnnotations[util.OvnNodeChassisID]; !ok { + nodeAnnotations[util.OvnNodeChassisID] = uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() + } return corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, - Annotations: annotations, + Annotations: nodeAnnotations, Labels: labels, }, Status: corev1.NodeStatus{ diff --git a/go-controller/pkg/ovn/egressip_udn_l2_test.go b/go-controller/pkg/ovn/egressip_udn_l2_test.go index a31dcac086..1b4d845800 100644 --- a/go-controller/pkg/ovn/egressip_udn_l2_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l2_test.go @@ -550,7 +550,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -567,7 +566,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1082,7 +1080,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1099,7 +1096,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1588,7 +1584,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1605,7 +1600,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1968,7 +1962,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -1985,7 +1978,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2337,7 +2329,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2354,7 +2345,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2723,7 +2713,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OvnNodeID: "1", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, @@ -2740,7 +2729,6 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, "k8s.ovn.org/node-chassis-id": "473ca66d-d800-472f-b289-1ab81ae7f21c", - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OvnNodeID: "2", util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeL3GatewayConfig: fmt.Sprintf(`{"%s":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"%s", "next-hops": ["%s"]}, diff --git a/go-controller/pkg/ovn/egressip_udn_l3_test.go b/go-controller/pkg/ovn/egressip_udn_l3_test.go index 3137457a43..a8a50a3724 100644 --- a/go-controller/pkg/ovn/egressip_udn_l3_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l3_test.go @@ -41,26 +41,25 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol ) const ( - nadName1 = "nad1" - networkName1 = "network1" - networkName1_ = networkName1 + "_" - node1Name = "node1" - v4Net1 = "20.128.0.0/14" - v4Node1Net1 = "20.128.0.0/16" - v4Pod1IPNode1Net1 = "20.128.0.5" - podName3 = "egress-pod3" - v4Pod2IPNode1Net1 = "20.128.0.6" - v4Node1Tsp = "100.88.0.2" - node2Name = "node2" - v4Node2Net1 = "20.129.0.0/16" - v4Node2Tsp = "100.88.0.3" - podName4 = "egress-pod4" - v4Pod1IPNode2Net1 = "20.129.0.2" - v4Pod2IPNode2Net1 = "20.129.0.3" - eIP1Mark = 50000 - eIP2Mark = 50001 - userDefinedNetworkID = "2" - //tnlKey = zoneinterconnect.BaseTransitSwitchTunnelKey + userDefinedNetworkID + nadName1 = "nad1" + networkName1 = "network1" + networkName1_ = networkName1 + "_" + node1Name = "node1" + v4Net1 = "20.128.0.0/14" + v4Node1Net1 = "20.128.0.0/16" + v4Pod1IPNode1Net1 = "20.128.0.5" + podName3 = "egress-pod3" + v4Pod2IPNode1Net1 = "20.128.0.6" + v4Node1Tsp = "100.88.0.2" + node2Name = "node2" + v4Node2Net1 = "20.129.0.0/16" + v4Node2Tsp = "100.88.0.3" + podName4 = "egress-pod4" + v4Pod1IPNode2Net1 = "20.129.0.2" + v4Pod2IPNode2Net1 = "20.129.0.3" + eIP1Mark = 50000 + eIP2Mark = 50001 + // tnlKey = zoneinterconnect.BaseTransitSwitchTunnelKey + userDefinedNetworkID tnlKey = "16711685" ) @@ -169,6 +168,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -181,6 +181,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMetaWithMark(egressIPName, eIP1Mark), @@ -262,7 +263,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{}, }, } @@ -474,7 +475,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -543,10 +544,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -556,10 +557,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -639,7 +640,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -852,7 +853,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -990,7 +991,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -1064,10 +1065,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -1077,11 +1078,33 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) + gwConfig, err := util.ParseNodeL3GatewayAnnotation(&node1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + rtosPortName := "rtos-" + networkName1_ + node1Name + rtosPortUUID := rtosPortName + "-UUID" + rtosChassisName := rtosPortName + "-" + node1.Annotations[util.OvnNodeChassisID] + rtosChassisUUID := rtosChassisName + "-UUID" + rtosPort := &nbdb.LogicalRouterPort{ + UUID: rtosPortUUID, + Name: rtosPortName, + MAC: util.IPAddrToHWAddr(util.GetNodeGatewayIfAddr(node1UDNSubnet).IP).String(), + Networks: []string{util.GetNodeGatewayIfAddr(node1UDNSubnet).String()}, + Options: map[string]string{ + "gateway_mtu": fmt.Sprintf("%d", config.Default.MTU), + }, + GatewayChassis: []string{rtosChassisUUID}, + } + rtosGatewayChassis := &nbdb.GatewayChassis{ + UUID: rtosChassisUUID, + Name: rtosChassisName, + ChassisName: node1.Annotations[util.OvnNodeChassisID], + Priority: 1, + } twoNodeStatus := []egressipv1.EgressIPStatusItem{ { Node: node1Name, @@ -1160,7 +1183,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -1394,17 +1417,20 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -1437,7 +1463,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -1466,7 +1492,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -1496,7 +1522,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateTwoEgressNodes)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseStateTwoEgressNodes)) ginkgo.By("delete EgressIP") err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1646,14 +1672,17 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: secConInfo.bnc.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -1686,7 +1715,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -1715,7 +1744,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -1745,7 +1774,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseState)) return nil } err := app.Run([]string{app.Name}) @@ -1800,10 +1829,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -1813,10 +1842,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -1896,7 +1925,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2106,7 +2135,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -2115,7 +2144,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol udnEnabledSvcV4, } ginkgo.By("ensure expected equals actual") - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateTwoEgressNodes)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveDataIgnoringUUIDs(expectedDatabaseStateTwoEgressNodes)) return nil } err := app.Run([]string{app.Name}) @@ -2170,10 +2199,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2183,10 +2212,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) twoNodeStatus := []egressipv1.EgressIPStatusItem{ { @@ -2266,7 +2295,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2464,7 +2493,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, }, getNoReRouteReplyTrafficPolicyForController(netInfo.GetNetworkName(), DefaultNetworkControllerName), @@ -2531,10 +2560,10 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node1Subnet, networkName1, v4Node1Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node1Tsp), "k8s.ovn.org/zone-name": node1Name, - "k8s.ovn.org/remote-zone-migrated": node1Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), util.OvnNodeID: "2", } + addL3GatewayConfig(node1Annotations, node1IPv4CIDR, "7e:57:f8:f0:3c:49") labels := map[string]string{ "k8s.ovn.org/egress-assignable": "", } @@ -2544,11 +2573,33 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\",\"%s\":\"%s\"}", v4Node2Subnet, networkName1, v4Node2Net1), "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{\"ipv4\":\"%s/16\"}", v4Node2Tsp), "k8s.ovn.org/zone-name": node2Name, - "k8s.ovn.org/remote-zone-migrated": node2Name, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), util.OvnNodeID: "3", } + addL3GatewayConfig(node2Annotations, node2IPv4CIDR, "7e:57:f8:f0:3c:50") node2 := getNodeObj(node2Name, node2Annotations, labels) + gwConfig, err := util.ParseNodeL3GatewayAnnotation(&node1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + rtosPortName := "rtos-" + networkName1_ + node1Name + rtosPortUUID := rtosPortName + "-UUID" + rtosChassisName := rtosPortName + "-" + node1.Annotations[util.OvnNodeChassisID] + rtosChassisUUID := rtosChassisName + "-UUID" + rtosPort := &nbdb.LogicalRouterPort{ + UUID: rtosPortUUID, + Name: rtosPortName, + MAC: util.IPAddrToHWAddr(util.GetNodeGatewayIfAddr(node1UDNSubnet).IP).String(), + Networks: []string{util.GetNodeGatewayIfAddr(node1UDNSubnet).String()}, + Options: map[string]string{ + "gateway_mtu": fmt.Sprintf("%d", config.Default.MTU), + }, + GatewayChassis: []string{rtosChassisUUID}, + } + rtosGatewayChassis := &nbdb.GatewayChassis{ + UUID: rtosChassisUUID, + Name: rtosChassisName, + ChassisName: node1.Annotations[util.OvnNodeChassisID], + Priority: 1, + } twoNodeStatus := []egressipv1.EgressIPStatusItem{ { Node: node1Name, @@ -2627,7 +2678,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: networkName1, ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), }, } fakeOvn.startWithDBSetup( @@ -2869,16 +2920,19 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol fmt.Sprintf("static-route-%s-%s-UUID", node2UDNLogicalRouterIPv4[0], v4Node2Tsp), }, Nat: []string{networkName1_ + node1Name + "-masqueradeNAT-UUID"}, - Ports: []string{netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, + Ports: []string{rtosPortUUID, netInfo.GetNetworkScopedName(ovntypes.RouterToTransitSwitchPrefix+node1.Name) + "-UUID"}, }, &nbdb.LogicalRouter{ UUID: netInfo.GetNetworkScopedGWRouterName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedGWRouterName(node1.Name), Ports: []string{ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + networkName1_ + node1.Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: gwRouterExternalIDs(netInfo, *gwConfig), + Options: gwRouterOptions(*gwConfig), Policies: []string{getGWPktMarkLRPUUID(eipNamespace2, podName2, IPFamilyValueV4, netInfo.GetNetworkName()), getGWPktMarkLRPUUID(eipNamespace2, podName4, IPFamilyValueV4, netInfo.GetNetworkName())}, }, + rtosPort, + rtosGatewayChassis, &nbdb.LogicalSwitchPort{ UUID: "k8s-" + networkName1_ + node1Name + "-UUID", Name: "k8s-" + networkName1_ + node1Name, @@ -2911,7 +2965,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol UUID: netInfo.GetNetworkScopedSwitchName(node1.Name) + "-UUID", Name: netInfo.GetNetworkScopedSwitchName(node1.Name), Ports: []string{"k8s-" + networkName1_ + node1Name + "-UUID", "stor-" + networkName1_ + node1Name + "-UUID"}, - ExternalIDs: map[string]string{ovntypes.NetworkExternalID: netInfo.GetNetworkName(), ovntypes.TopologyExternalID: ovntypes.Layer3Topology}, + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(netInfo), QOSRules: []string{fmt.Sprintf("%s-QoS-UUID", netInfo.GetNetworkName())}, OtherConfig: map[string]string{ "exclude_ips": util.GetNodeManagementIfAddr(node1UDNSubnet).IP.String(), @@ -2940,7 +2994,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol "node": node2.Name, }, Options: map[string]string{ - libovsdbops.RequestedChassis: node2.Name, + libovsdbops.RequestedChassis: node2.Annotations[util.OvnNodeChassisID], libovsdbops.RequestedTnlKey: node2.Annotations[util.OvnNodeID], }, Type: "remote", @@ -2980,6 +3034,14 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol }) }) +func addL3GatewayConfig(annotations map[string]string, nodeIPv4CIDR, mac string) { + annotations["k8s.ovn.org/l3-gateway-config"] = fmt.Sprintf( + `{"default":{"mode":"local","mac-address":%q, "ip-address":%q, "next-hop":"192.168.126.1"}}`, + mac, + nodeIPv4CIDR, + ) +} + // returns the address set with externalID "k8s.ovn.org/name": "egressip-served-pods"" func buildEgressIPServedPodsAddressSetsForController(ips []string, network, controller string) (*nbdb.AddressSet, *nbdb.AddressSet) { dbIDs := getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, network, controller) diff --git a/go-controller/pkg/ovn/egressservices_test.go b/go-controller/pkg/ovn/egressservices_test.go index 25412c13d5..612941a338 100644 --- a/go-controller/pkg/ovn/egressservices_test.go +++ b/go-controller/pkg/ovn/egressservices_test.go @@ -1700,6 +1700,7 @@ func nodeFor(name, ipv4, ipv6, v4subnet, v6subnet, transitIPv4, transitIPv6 stri "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", ipv4, ipv6), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\",\"%s\"]", fmt.Sprintf("%s/24", ipv4), fmt.Sprintf("%s/64", ipv6)), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\",\"%s\"]}", v4subnet, v6subnet), + util.OvnNodeChassisID: chassisIDForNode(name), // Used only with IC tests "k8s.ovn.org/zone-name": name, diff --git a/go-controller/pkg/ovn/external_gateway_apb_test.go b/go-controller/pkg/ovn/external_gateway_apb_test.go index b237174ae0..2e91281041 100644 --- a/go-controller/pkg/ovn/external_gateway_apb_test.go +++ b/go-controller/pkg/ovn/external_gateway_apb_test.go @@ -178,7 +178,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -214,7 +214,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -324,7 +324,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -360,7 +360,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -463,7 +463,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -499,7 +499,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -606,7 +606,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -652,7 +652,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -814,7 +814,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -896,7 +896,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1039,7 +1039,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, }, @@ -1167,7 +1167,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1238,7 +1238,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1339,7 +1339,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1375,7 +1375,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1481,7 +1481,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1517,7 +1517,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1643,7 +1643,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1679,7 +1679,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1797,7 +1797,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1833,7 +1833,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1860,7 +1860,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1902,7 +1902,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -1992,7 +1992,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2112,7 +2112,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2243,7 +2243,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2285,7 +2285,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2341,7 +2341,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { Name: "namespace1_myPod", Options: map[string]string{ "iface-id-ver": "myPod", - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, }, @@ -2539,7 +2539,7 @@ var _ = ginkgo.Describe("OVN for APB External Route Operations", func() { }, Name: "namespace1_myPod", Options: map[string]string{ - libovsdbops.RequestedChassis: "node1", + libovsdbops.RequestedChassis: chassisIDForNode("node1"), "iface-id-ver": "myPod", }, PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, diff --git a/go-controller/pkg/ovn/hybrid.go b/go-controller/pkg/ovn/hybrid.go index 0164cc4076..d9a5610940 100644 --- a/go-controller/pkg/ovn/hybrid.go +++ b/go-controller/pkg/ovn/hybrid.go @@ -279,7 +279,7 @@ func (oc *DefaultNetworkController) setupHybridLRPolicySharedGw(nodeSubnets []*n }, &clusterRouterStaticRoutes.Nexthop); err != nil { return fmt.Errorf("failed to add policy route static '%s %s' for on %s , error: %w", clusterRouterStaticRoutes.IPPrefix, clusterRouterStaticRoutes.Nexthop, - oc.GetNetworkScopedGWRouterName(nodeName), err) + ovntypes.OVNClusterRouter, err) } klog.Infof("Created hybrid overlay logical route static route at cluster router for node %s", nodeName) diff --git a/go-controller/pkg/ovn/kubevirt_test.go b/go-controller/pkg/ovn/kubevirt_test.go index 5f2ce57b3f..9aad857d9c 100644 --- a/go-controller/pkg/ovn/kubevirt_test.go +++ b/go-controller/pkg/ovn/kubevirt_test.go @@ -748,7 +748,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node1].transitSwitchPortIPv4, nodeByName[node1].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node1].subnetIPv4, nodeByName[node1].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:51", "ip-addresses":[%q, %q]}}`, nodeByName[node1].addressIPv4, nodeByName[node1].addressIPv6), - "k8s.ovn.org/node-chassis-id": "1", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node1), util.OvnNodeID: nodeByName[node1].nodeID, }, }, @@ -760,7 +760,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node2].transitSwitchPortIPv4, nodeByName[node2].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node2].subnetIPv4, nodeByName[node2].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:52", "ip-addresses":[%q, %q]}}`, nodeByName[node2].addressIPv4, nodeByName[node2].addressIPv6), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node2), util.OvnNodeID: nodeByName[node2].nodeID, }, }, @@ -772,7 +772,7 @@ var _ = Describe("OVN Kubevirt Operations", func() { "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf(`{"ipv4": %q, "ipv6": %q}`, nodeByName[node3].transitSwitchPortIPv4, nodeByName[node3].transitSwitchPortIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf(`{"default":[%q,%q]}`, nodeByName[node3].subnetIPv4, nodeByName[node3].subnetIPv6), "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default": {"mode": "local", "mac-address":"7e:57:f8:f0:3c:53", "ip-addresses":[%q, %q]}}`, nodeByName[node3].addressIPv4, nodeByName[node3].addressIPv6), - "k8s.ovn.org/node-chassis-id": "3", + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node3), util.OvnNodeID: nodeByName[node3].nodeID, }, }, diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index ff9ef2a4b4..63f4994cfa 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -817,9 +817,19 @@ func (oc *Layer2UserDefinedNetworkController) addSwitchPortForRemoteNodeGR(node return fmt.Errorf("failed to fetch tunnelID annotation from the node %s for network %s, err: %w", node.Name, oc.GetNetworkName(), err) } + + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + logicalSwitchPort.Options = map[string]string{ libovsdbops.RequestedTnlKey: strconv.Itoa(tunnelID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, } sw := nbdb.LogicalSwitch{Name: oc.GetNetworkScopedSwitchName(types.OVNLayer2Switch)} err = libovsdbops.CreateOrUpdateLogicalSwitchPortsOnSwitch(oc.nbClient, &sw, &logicalSwitchPort) @@ -889,13 +899,23 @@ func (oc *Layer2UserDefinedNetworkController) addRouterSetupForRemoteNodeGR(node if err != nil { return nil } + + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + transitPort := nbdb.LogicalRouterPort{ Name: types.TransitRouterToRouterPrefix + oc.GetNetworkScopedGWRouterName(node.Name), MAC: util.IPAddrToHWAddr(transitRouterInfo.transitRouterNets[0].IP).String(), Networks: util.IPNetsToStringSlice(transitRouterInfo.transitRouterNets), Options: map[string]string{ libovsdbops.RequestedTnlKey: getTransitRouterPortTunnelKey(transitRouterInfo.nodeID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, }, ExternalIDs: map[string]string{ types.NetworkExternalID: oc.GetNetworkName(), @@ -992,10 +1012,12 @@ func (oc *Layer2UserDefinedNetworkController) cleanupRouterSetupForRemoteNodeGR( func (oc *Layer2UserDefinedNetworkController) deleteNodeEvent(node *corev1.Node) error { if _, local := oc.localZoneNodes.Load(node.Name); local { - if err := oc.gatewayManagerForNode(node.Name).Cleanup(); err != nil { - return fmt.Errorf("failed to cleanup gateway on node %q: %w", node.Name, err) + if util.IsNetworkSegmentationSupportEnabled() && oc.IsPrimaryNetwork() { + if err := oc.gatewayManagerForNode(node.Name).Cleanup(); err != nil { + return fmt.Errorf("failed to cleanup gateway on node %q: %w", node.Name, err) + } + oc.gatewayManagers.Delete(node.Name) } - oc.gatewayManagers.Delete(node.Name) } else { if config.Layer2UsesTransitRouter { // this is a no-op for local nodes diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go index 947dfbfea9..7461784139 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go @@ -135,7 +135,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, []testPod{podInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...)) + ).expectedLogicalSwitchesAndPorts()...)) return nil } @@ -266,7 +266,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, []testPod{sourcePodInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...)) + ).expectedLogicalSwitchesAndPorts()...)) targetPodInfo := dummyL2TestPod(ns, netInfo, targetPodInfoIdx, userDefinedNetworkIdx) targetKvPod := newMultiHomedKubevirtPod( @@ -293,7 +293,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { fakeOvn, testPods, expectationOptions..., - ).expectedLogicalSwitchesAndPortsWithLspEnabled(netInfo.isPrimary, expectedPodLspEnabled)...)) + ).expectedLogicalSwitchesAndPortsWithLspEnabled(expectedPodLspEnabled)...)) return nil } @@ -365,13 +365,8 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { "user-defined network controller DB entities are properly cleaned up", func(netInfo userDefinedNetInfo, testConfig testConfiguration) { podInfo := dummyTestPod(ns, netInfo) - if testConfig.configToOverride != nil { - config.OVNKubernetesFeature = *testConfig.configToOverride - if testConfig.gatewayConfig != nil { - config.Gateway.DisableSNATMultipleGWs = testConfig.gatewayConfig.DisableSNATMultipleGWs - } - config.OVNKubernetesFeature.EnableMultiNetwork = true - } + setupConfig(netInfo, testConfig, config.GatewayModeShared) + config.OVNKubernetesFeature.EnableMultiNetwork = true app.Action = func(*cli.Context) error { netConf := netInfo.netconf() networkConfig, err := util.NewNetInfo(netConf) @@ -395,7 +390,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { gwConfig, err := util.ParseNodeL3GatewayAnnotation(testNode) Expect(err).NotTo(HaveOccurred()) Expect(gwConfig.NextHops).NotTo(BeEmpty()) - nbZone := &nbdb.NBGlobal{Name: ovntypes.OvnDefaultZone, UUID: ovntypes.OvnDefaultZone} + nbZone := &nbdb.NBGlobal{Name: config.Default.Zone, UUID: config.Default.Zone} n := newNamespace(ns) if netInfo.isPrimary { @@ -1040,6 +1035,7 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC } if staleNode { staleNodeName := "stale-node" + staleNodeChassisID := chassisIDForNode("stale-node") // create remote router port remoteRouterName := fmt.Sprintf("GR_%s_%s", netInfo.GetNetworkName(), staleNodeName) remotePortName := fmt.Sprintf("%s%s", ovntypes.TransitRouterToRouterPrefix, remoteRouterName) @@ -1054,7 +1050,7 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC MAC: util.IPAddrToHWAddr(remoteTRInfo.transitRouterNets[0].IP).String(), Options: map[string]string{ libovsdbops.RequestedTnlKey: "15", // as defined by getTransitRouterPortTunnelKey(nodeID) - libovsdbops.RequestedChassis: staleNodeName}, + libovsdbops.RequestedChassis: staleNodeChassisID}, ExternalIDs: externalIDs, } expectedEntities = append(expectedEntities, remotePort) diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index ff9da8b800..24ccf96a5f 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -1048,8 +1048,8 @@ func (oc *Layer3UserDefinedNetworkController) syncNodes(nodes []interface{}) err } if config.OVNKubernetesFeature.EnableInterconnect { - if err := oc.zoneICHandler.SyncNodes(activeNodes); err != nil { - return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + if err := oc.zoneICHandler.CleanupStaleNodes(activeNodes); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup stale nodes: error: %w", err) } } diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go index 52c3aab0d6..ed70df467f 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go @@ -253,7 +253,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { fakeOvn, []testPod{podInfo}, expectationOptions..., - ).expectedLogicalSwitchesAndPorts(netInfo.isPrimary)...))) + ).expectedLogicalSwitchesAndPorts()...))) return nil } @@ -841,10 +841,6 @@ func makeCUDNOwnerRef(name string) metav1.OwnerReference { } } -func (sni *userDefinedNetInfo) getNetworkRole() string { - return util.GetUserDefinedNetworkRole(sni.isPrimary) -} - func getNetworkRole(netInfo util.NetInfo) string { return util.GetUserDefinedNetworkRole(netInfo.IsPrimaryNetwork()) } @@ -855,10 +851,7 @@ func (sni *userDefinedNetInfo) setupOVNDependencies(dbData *libovsdbtest.TestSet return err } - externalIDs := map[string]string{ - types.NetworkExternalID: sni.netName, - types.NetworkRoleExternalID: sni.getNetworkRole(), - } + externalIDs := util.GenerateExternalIDsForSwitchOrRouter(netInfo) switch sni.topology { case types.Layer2Topology: dbData.NBData = append(dbData.NBData, &nbdb.LogicalSwitch{ @@ -1010,7 +1003,7 @@ func newNodeWithUserDefinedNetworks(nodeName string, nodeIPv4CIDR string, netInf util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4CIDR), "k8s.ovn.org/zone-name": zone, "k8s.ovn.org/l3-gateway-config": fmt.Sprintf("{\"default\":{\"mode\":\"shared\",\"bridge-id\":\"breth0\",\"interface-id\":\"breth0_ovn-worker\",\"mac-address\":%q,\"ip-addresses\":[%[2]q],\"ip-address\":%[2]q,\"next-hops\":[%[3]q],\"next-hop\":%[3]q,\"node-port-enable\":\"true\",\"vlan-id\":\"0\"}}", util.IPAddrToHWAddr(nodeIP), nodeCIDR, nextHopIP), - util.OvnNodeChassisID: "abdcef", + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/network-ids": fmt.Sprintf("{\"default\":\"0\",\"isolatednet\":\"%s\"}", userDefinedNetworkID), util.OvnNodeID: "4", "k8s.ovn.org/udn-layer2-node-gateway-router-lrp-tunnel-ids": "{\"isolatednet\":\"25\"}", diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 3fe803660e..d969e73d17 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -276,17 +276,17 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { return } - localZoneNodeNames := make([]string, 0, len(kNodes)) - remoteZoneNodeNames := make([]string, 0, len(kNodes)) + localZoneNodes := make([]*corev1.Node, 0, len(kNodes)) + remoteZoneNodes := make([]*corev1.Node, 0, len(kNodes)) for i := range kNodes { if oc.isLocalZoneNode(kNodes[i]) { - localZoneNodeNames = append(localZoneNodeNames, kNodes[i].Name) + localZoneNodes = append(localZoneNodes, kNodes[i]) } else { - remoteZoneNodeNames = append(remoteZoneNodeNames, kNodes[i].Name) + remoteZoneNodes = append(remoteZoneNodes, kNodes[i]) } } - if err := oc.syncChassis(localZoneNodeNames, remoteZoneNodeNames); err != nil { + if err := oc.syncChassis(localZoneNodes, remoteZoneNodes); err != nil { klog.Errorf("Failed to sync chassis: error: %v", err) } } @@ -297,8 +297,8 @@ func (oc *DefaultNetworkController) syncNodesPeriodic() { // do not want to delete. func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { foundNodes := sets.New[string]() - localZoneNodeNames := make([]string, 0, len(kNodes)) - remoteZoneKNodeNames := make([]string, 0, len(kNodes)) + localZoneNodes := make([]*corev1.Node, 0, len(kNodes)) + remoteZoneNodes := make([]*corev1.Node, 0, len(kNodes)) for _, tmp := range kNodes { node, ok := tmp.(*corev1.Node) if !ok { @@ -313,9 +313,9 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if oc.isLocalZoneNode(node) { foundNodes.Insert(node.Name) oc.localZoneNodes.Store(node.Name, true) - localZoneNodeNames = append(localZoneNodeNames, node.Name) + localZoneNodes = append(localZoneNodes, node) } else { - remoteZoneKNodeNames = append(remoteZoneKNodeNames, node.Name) + remoteZoneNodes = append(remoteZoneNodes, node) } } @@ -359,7 +359,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { if ok { return false } - nodeName := strings.TrimPrefix(item.Name, types.GWRouterPrefix) + nodeName := util.GetWorkerFromGatewayRouter(item.Name) if nodeName != item.Name && len(nodeName) > 0 && !foundNodes.Has(nodeName) { staleSwitches.Insert(nodeName) return true @@ -378,17 +378,28 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { } } - if err := oc.syncChassis(localZoneNodeNames, remoteZoneKNodeNames); err != nil { + if err := oc.syncChassis(localZoneNodes, remoteZoneNodes); err != nil { return fmt.Errorf("failed to sync chassis: error: %v", err) } if config.OVNKubernetesFeature.EnableInterconnect { + // Chassis cleanup should happen regardless of transport mode to cleanup + // any stale remote chassis entries (e.g., from overlay->no-overlay migration) if err := oc.zoneChassisHandler.SyncNodes(kNodes); err != nil { return fmt.Errorf("zoneChassisHandler failed to sync nodes: error: %w", err) } - if err := oc.zoneICHandler.SyncNodes(kNodes); err != nil { - return fmt.Errorf("zoneICHandler failed to sync nodes: error: %w", err) + // Interconnect resource sync depends on transport mode: + // - For overlay: ensure transit switch exists and cleanup stale resources + // - For no-overlay: cleanup all interconnect resources (nodes and transit switch) + if oc.Transport() == types.NetworkTransportNoOverlay { + if err := oc.zoneICHandler.Cleanup(); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup: error: %w", err) + } + } else { + if err := oc.zoneICHandler.CleanupStaleNodes(kNodes); err != nil { + return fmt.Errorf("zoneICHandler failed to cleanup stale nodes: error: %w", err) + } } } @@ -397,7 +408,7 @@ func (oc *DefaultNetworkController) syncNodes(kNodes []interface{}) error { // Cleanup stale chassis and chassis template variables with no // corresponding nodes. -func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNodeNames []string) error { +func (oc *DefaultNetworkController) syncChassis(localZoneNodes, remoteZoneNodes []*corev1.Node) error { chassisList, err := libovsdbops.ListChassis(oc.sbClient) if err != nil { return fmt.Errorf("failed to get chassis list: error: %v", err) @@ -418,10 +429,8 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo } } - chassisHostNameMap := map[string]*sbdb.Chassis{} chassisNameMap := map[string]*sbdb.Chassis{} for _, chassis := range chassisList { - chassisHostNameMap[chassis.Hostname] = chassis chassisNameMap[chassis.Name] = chassis } @@ -443,26 +452,33 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo // Delete existing nodes from the chassis map. // Also delete existing templateVars from the template map. - for _, nodeName := range localZoneNodeNames { - if chassis, ok := chassisHostNameMap[nodeName]; ok { - delete(chassisNameMap, chassis.Name) - delete(chassisHostNameMap, chassis.Hostname) - delete(templateChassisMap, chassis.Name) + for _, node := range localZoneNodes { + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + klog.Warningf("Unable to parse local node %s chassis-id annotation. Chassis may be removed during sync", + node.Name) + continue } + delete(chassisNameMap, chassisID) + delete(templateChassisMap, chassisID) } // Delete existing remote zone nodes from the chassis map, but not from the templateVars // as we need to cleanup chassisTemplateVars for the remote zone nodes - for _, nodeName := range remoteZoneNodeNames { - if chassis, ok := chassisHostNameMap[nodeName]; ok { - delete(chassisNameMap, chassis.Name) - delete(chassisHostNameMap, chassis.Hostname) + for _, node := range remoteZoneNodes { + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + klog.Warningf("Unable to parse remote node %s chassis-id annotation. Chassis may be removed during sync", + node.Name) + continue } + delete(chassisNameMap, chassisID) } - staleChassis := make([]*sbdb.Chassis, 0, len(chassisHostNameMap)) - for _, chassis := range chassisNameMap { + staleChassis := make([]*sbdb.Chassis, 0, len(chassisNameMap)) + for name, chassis := range chassisNameMap { staleChassis = append(staleChassis, chassis) + klog.Infof("Removing stale chassis with ID/Name: %s, hostname: %s", name, chassis.Hostname) } staleChassisTemplateVars := make([]*nbdb.ChassisTemplateVar, 0, len(templateChassisMap)) @@ -471,11 +487,11 @@ func (oc *DefaultNetworkController) syncChassis(localZoneNodeNames, remoteZoneNo } if err := libovsdbops.DeleteChassis(oc.sbClient, staleChassis...); err != nil { - return fmt.Errorf("failed Deleting chassis %v error: %v", chassisHostNameMap, err) + return fmt.Errorf("failed Deleting chassis %#v error: %v", chassisNameMap, err) } if err := libovsdbops.DeleteChassisTemplateVar(oc.nbClient, staleChassisTemplateVars...); err != nil { - return fmt.Errorf("failed Deleting chassis template vars %v error: %v", chassisHostNameMap, err) + return fmt.Errorf("failed Deleting chassis template vars %#v error: %v", staleChassisTemplateVars, err) } return nil @@ -638,21 +654,33 @@ func (oc *DefaultNetworkController) addUpdateLocalNodeEvent(node *corev1.Node, n } if nSyncs.syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { - // Call zone chassis handler's AddLocalZoneNode function to mark + // Always call zone chassis handler's AddLocalZoneNode function to mark // this node's chassis record in Southbound db as a local zone chassis. - // This is required when a node moves from a remote zone to local zone + // This is required even when the default network uses no-overlay transport, + // because user-defined networks may still use overlay transport and require + // the chassis entries for their transit switch connectivity. + chassisFailed := false if err := oc.zoneChassisHandler.AddLocalZoneNode(node); err != nil { errs = append(errs, err) oc.syncZoneICFailed.Store(node.Name, true) - } else { + chassisFailed = true + } + + // For no-overlay transport, the default network's interconnect resources are not needed. + // The transit switch and its resources are cleaned up during sync, so we only need + // to create IC resources for overlay transport. + if oc.Transport() != types.NetworkTransportNoOverlay { // Call zone IC handler's AddLocalZoneNode function to create // interconnect resources in the OVN Northbound db for this local zone node. if err := oc.zoneICHandler.AddLocalZoneNode(node); err != nil { errs = append(errs, err) oc.syncZoneICFailed.Store(node.Name, true) - } else { + } else if !chassisFailed { oc.syncZoneICFailed.Delete(node.Name) } + } else if !chassisFailed { + // In no-overlay mode, if chassis handler succeeded, clear the failed state + oc.syncZoneICFailed.Delete(node.Name) } } @@ -680,25 +708,34 @@ func (oc *DefaultNetworkController) addUpdateRemoteNodeEvent(node *corev1.Node, var err error if syncZoneIC && config.OVNKubernetesFeature.EnableInterconnect { - // Call zone chassis handler's AddRemoteZoneNode function to creates - // the remote chassis for the remote zone node in the SB DB or mark - // the entry as remote if it was local chassis earlier + // Always create remote chassis entry with geneve encapsulation. + // This is needed even when the default network uses no-overlay transport, + // because user-defined networks may still use overlay transport and require + // the remote chassis entries for their transit switch connectivity. if err = oc.zoneChassisHandler.AddRemoteZoneNode(node); err != nil { err = fmt.Errorf("adding or updating remote node chassis %s failed, err - %w", node.Name, err) oc.syncZoneICFailed.Store(node.Name, true) return err } - // Call zone IC handler's AddRemoteZoneNode function to create - // interconnect resources in the OVN NBDB for this remote zone node. - // Also, create the remote port binding in SBDB - if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { - err = fmt.Errorf("adding or updating remote node IC resources %s failed, err - %w", node.Name, err) - oc.syncZoneICFailed.Store(node.Name, true) + // For no-overlay transport, the default network's interconnect resources are not needed. + // The transit switch and its resources are cleaned up during sync, so we only need + // to create IC resources for overlay transport. + if oc.Transport() != types.NetworkTransportNoOverlay { + // Call zone IC handler's AddRemoteZoneNode function to create + // interconnect resources in the OVN NBDB for this remote zone node. + // Also, create the remote port binding in SBDB + if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { + err = fmt.Errorf("adding or updating remote node IC resources %s failed, err - %w", node.Name, err) + oc.syncZoneICFailed.Store(node.Name, true) + } else { + oc.syncZoneICFailed.Delete(node.Name) + } + klog.V(5).Infof("Creating Interconnect resources for remote node %q on network %q took: %s", node.Name, oc.GetNetworkName(), time.Since(start)) } else { + // In no-overlay mode, if chassis handler succeeded, clear the failed state oc.syncZoneICFailed.Delete(node.Name) } - klog.V(5).Infof("Creating Interconnect resources for remote node %q on network %q took: %s", node.Name, oc.GetNetworkName(), time.Since(start)) } return err } diff --git a/go-controller/pkg/ovn/master_test.go b/go-controller/pkg/ovn/master_test.go index 8de1687ee3..f040f1a8bb 100644 --- a/go-controller/pkg/ovn/master_test.go +++ b/go-controller/pkg/ovn/master_test.go @@ -1731,7 +1731,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { Name: "newNode", Annotations: map[string]string{ "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"fd02:0:0:2::2895/64\"]}", newNodeSubnet), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("newNode"), util.OvnNodeID: "2", }, }, @@ -1793,7 +1793,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { Name: "newNode", Annotations: map[string]string{ "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", newNodeIpv4Subnet), - "k8s.ovn.org/node-chassis-id": "2", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("newNode"), "k8s.ovn.org/node-gateway-router-lrp-ifaddr": "{\"ipv4\":\"100.64.0.2/16\"}", }, }, @@ -1906,7 +1906,7 @@ var _ = ginkgo.Describe("Default network controller operations", func() { newNodeSubnet := "10.1.2.0/24" transitSwitchSubnet := "100.88.0.3/16" testNode.Annotations["k8s.ovn.org/node-subnets"] = fmt.Sprintf("{\"default\":[\"%s\"]}", newNodeSubnet) - testNode.Annotations["k8s.ovn.org/node-chassis-id"] = "2" + testNode.Annotations["k8s.ovn.org/node-chassis-id"] = chassisIDForNode(testNode.Name) testNode.Annotations["k8s.ovn.org/node-transit-switch-port-ifaddr"] = fmt.Sprintf("{\"ipv4\":\"%s\"}", transitSwitchSubnet) testNode.Annotations["k8s.ovn.org/zone-name"] = "foo" updatedNode, err := fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &testNode, metav1.CreateOptions{}) @@ -2135,15 +2135,15 @@ func TestController_syncNodes(t *testing.T) { { name: "removes stale chassis and chassis private", initialSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1", Hostname: node1Name}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, - &sbdb.Chassis{Name: "chassis-node2", Hostname: nodeRmName}, - &sbdb.ChassisPrivate{Name: "chassis-node2"}, - &sbdb.ChassisPrivate{Name: "chassis-node3"}, + &sbdb.Chassis{Name: chassisIDForNode(node1Name), Hostname: node1Name}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(node1Name)}, + &sbdb.Chassis{Name: chassisIDForNode(nodeRmName), Hostname: nodeRmName}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(nodeRmName)}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node3")}, }, expectedSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1", Hostname: node1Name}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, + &sbdb.Chassis{Name: chassisIDForNode(node1Name), Hostname: node1Name}, + &sbdb.ChassisPrivate{Name: chassisIDForNode(node1Name)}, }, }, } @@ -2159,6 +2159,9 @@ func TestController_syncNodes(t *testing.T) { testNode := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node1", + Annotations: map[string]string{ + "k8s.ovn.org/node-chassis-id": chassisIDForNode(node1Name), + }, }, } @@ -2243,20 +2246,20 @@ func TestController_deleteStaleNodeChassis(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - "k8s.ovn.org/node-chassis-id": "chassis-node1-dpu", + "k8s.ovn.org/node-chassis-id": chassisIDForNode("node1-dpu"), }, }, }, name: "removes stale chassis when ovn running on DPU", initialSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1-dpu", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1-dpu"}, - &sbdb.Chassis{Name: "chassis-node1", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1"}, + &sbdb.Chassis{Name: chassisIDForNode("node1-dpu"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1-dpu")}, + &sbdb.Chassis{Name: chassisIDForNode("node1"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1")}, }, expectedSBDB: []libovsdbtest.TestData{ - &sbdb.Chassis{Name: "chassis-node1-dpu", Hostname: "node1"}, - &sbdb.ChassisPrivate{Name: "chassis-node1-dpu"}, + &sbdb.Chassis{Name: chassisIDForNode("node1-dpu"), Hostname: "node1"}, + &sbdb.ChassisPrivate{Name: chassisIDForNode("node1-dpu")}, }, }, } diff --git a/go-controller/pkg/ovn/multicast_test.go b/go-controller/pkg/ovn/multicast_test.go index 0c80906feb..b951fc0957 100644 --- a/go-controller/pkg/ovn/multicast_test.go +++ b/go-controller/pkg/ovn/multicast_test.go @@ -264,7 +264,7 @@ func newNodeWithNad(nad *nadapi.NetworkAttachmentDefinition, networkName, networ n.Annotations["k8s.ovn.org/node-subnets"] = fmt.Sprintf("{\"default\":\"192.168.126.202/24\", \"%s\":\"192.168.127.202/24\"}", networkName) n.Annotations["k8s.ovn.org/network-ids"] = fmt.Sprintf("{\"default\":\"0\",\"%s\":\"%s\"}", networkName, networkID) n.Annotations["k8s.ovn.org/node-mgmt-port-mac-addresses"] = fmt.Sprintf("{\"default\":\"96:8f:e8:25:a2:e5\",\"%s\":\"d6:bc:85:32:30:fb\"}", networkName) - n.Annotations["k8s.ovn.org/node-chassis-id"] = "abdcef" + n.Annotations["k8s.ovn.org/node-chassis-id"] = chassisIDForNode(n.Name) n.Annotations["k8s.ovn.org/l3-gateway-config"] = "{\"default\":{\"mac-address\":\"52:54:00:e2:ed:d0\",\"ip-addresses\":[\"10.1.1.10/24\"],\"ip-address\":\"10.1.1.10/24\",\"next-hops\":[\"10.1.1.1\"],\"next-hop\":\"10.1.1.1\"}}" n.Annotations[util.OvnNodeID] = "4" } diff --git a/go-controller/pkg/ovn/multihoming_test.go b/go-controller/pkg/ovn/multihoming_test.go index e41593dd77..3743df4e03 100644 --- a/go-controller/pkg/ovn/multihoming_test.go +++ b/go-controller/pkg/ovn/multihoming_test.go @@ -118,11 +118,11 @@ func withClusterPortGroup() option { } } -func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPorts(isPrimary bool) []libovsdbtest.TestData { - return em.expectedLogicalSwitchesAndPortsWithLspEnabled(isPrimary, nil) +func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPorts() []libovsdbtest.TestData { + return em.expectedLogicalSwitchesAndPortsWithLspEnabled(nil) } -func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsWithLspEnabled(isPrimary bool, expectedPodLspEnabled map[string]*bool) []libovsdbtest.TestData { +func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsWithLspEnabled(expectedPodLspEnabled map[string]*bool) []libovsdbtest.TestData { data := []libovsdbtest.TestData{} for _, ocInfo := range em.fakeOvn.userDefinedNetworkControllers { nodeslsps := make(map[string][]string) @@ -260,10 +260,8 @@ func (em *userDefinedNetworkExpectationMachine) expectedLogicalSwitchesAndPortsW UUID: switchName + "-UUID", Name: switchName, Ports: nodeslsps[switchName], - ExternalIDs: map[string]string{ - ovntypes.NetworkExternalID: ocInfo.bnc.GetNetworkName(), - ovntypes.NetworkRoleExternalID: util.GetUserDefinedNetworkRole(isPrimary), - }, + + ExternalIDs: util.GenerateExternalIDsForSwitchOrRouter(ocInfo.bnc), OtherConfig: otherConfig, ACLs: acls[switchName], } @@ -331,7 +329,7 @@ func newExpectedSwitchPort(lspUUID string, portName string, podAddr string, pod ovntypes.TopologyExternalID: netInfo.TopologyType(), }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, PortSecurity: []string{podAddr}, diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 0f41bc4e95..0d6ea4b2d3 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -152,7 +152,7 @@ func getExpectedDataPodsAndSwitchesForUserDefinedNetwork(fakeOvn *FakeOVN, pods ovntypes.TopologyExternalID: ocInfo.bnc.TopologyType(), }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, @@ -571,7 +571,6 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { if remote { config.OVNKubernetesFeature.EnableInterconnect = true node.Annotations["k8s.ovn.org/zone-name"] = "remote" - node.Annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" node.Annotations, err = util.UpdateNetworkIDAnnotation(node.Annotations, ovntypes.DefaultNetworkName, 0) gomega.Expect(err).NotTo(gomega.HaveOccurred()) if topology != ovntypes.LocalnetTopology { diff --git a/go-controller/pkg/ovn/network_segmentation_test.go b/go-controller/pkg/ovn/network_segmentation_test.go index cfcc0f7e83..97e48ec1b9 100644 --- a/go-controller/pkg/ovn/network_segmentation_test.go +++ b/go-controller/pkg/ovn/network_segmentation_test.go @@ -85,7 +85,7 @@ var _ = ginkgo.Describe("OVN Pod Operations with network segmentation", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t1.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t1), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index c137141129..52bbbf5956 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -39,6 +39,7 @@ import ( egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" udnclientfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" libovsdbutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/util" @@ -179,6 +180,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { IPAMClaimsClient: fakeipamclaimclient.NewSimpleClientset(ipamClaimObjects...), NetworkAttchDefClient: nadClient, UserDefinedNetworkClient: udnclientfake.NewSimpleClientset(), + VTEPClient: vtepfake.NewSimpleClientset(), } o.init(nads) } diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index c5ed638bc4..437e15bee3 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/google/uuid" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" "github.com/urfave/cli/v2" @@ -142,6 +143,7 @@ func newNode(nodeName, nodeIPv4CIDR string) *corev1.Node { "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4CIDR, ""), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4CIDR), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, Labels: map[string]string{ @@ -167,6 +169,7 @@ func newNodeGlobalZoneNotEgressableV4Only(nodeName, nodeIPv4 string) *corev1.Nod "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, ""), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, }, @@ -189,6 +192,7 @@ func newNodeGlobalZoneNotEgressableV6Only(nodeName, nodeIPv6 string) *corev1.Nod "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", nodeIPv6), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6Node1Subnet), util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv6), + util.OvnNodeChassisID: chassisIDForNode(nodeName), "k8s.ovn.org/zone-name": "global", }, }, @@ -210,19 +214,20 @@ func newNodeGlobalZoneNotEgressableV6Only(nodeName, nodeIPv6 string) *corev1.Nod } type testPod struct { - portUUID string - nodeName string - nodeSubnet string - nodeMgtIP string - nodeGWIP string - podName string - podIP string - podMAC string - namespace string - portName string - routes []util.PodRoute - noIfaceIdVer bool - networkRole string + portUUID string + nodeName string + nodeChassisID string + nodeSubnet string + nodeMgtIP string + nodeGWIP string + podName string + podIP string + podMAC string + namespace string + portName string + routes []util.PodRoute + noIfaceIdVer bool + networkRole string udnPodInfos map[string]*udnPodInfo } @@ -245,21 +250,40 @@ type portInfo struct { prefixLen int } +func chassisIDForNode(nodeName string) string { + return uuid.NewSHA1(uuid.NameSpaceOID, []byte(nodeName)).String() +} + +func requestedChassisForPod(pod testPod) string { + if pod.nodeChassisID != "" { + return pod.nodeChassisID + } + if pod.nodeName == "" { + return "" + } + return chassisIDForNode(pod.nodeName) +} + func newTPod(nodeName, nodeSubnet, nodeMgtIP, nodeGWIP, podName, podIPs, podMAC, namespace string) testPod { portName := util.GetLogicalPortName(namespace, podName) + nodeChassisID := "" + if nodeName != "" { + nodeChassisID = chassisIDForNode(nodeName) + } to := testPod{ - portUUID: portName + "-UUID", - nodeSubnet: nodeSubnet, - nodeMgtIP: nodeMgtIP, - nodeGWIP: nodeGWIP, - podIP: podIPs, - podMAC: podMAC, - portName: portName, - nodeName: nodeName, - podName: podName, - namespace: namespace, - udnPodInfos: map[string]*udnPodInfo{}, - networkRole: ovntypes.NetworkRolePrimary, // all tests here run with network-segmentation disabled by default by default + portUUID: portName + "-UUID", + nodeSubnet: nodeSubnet, + nodeMgtIP: nodeMgtIP, + nodeGWIP: nodeGWIP, + podIP: podIPs, + podMAC: podMAC, + portName: portName, + nodeName: nodeName, + nodeChassisID: nodeChassisID, + podName: podName, + namespace: namespace, + udnPodInfos: map[string]*udnPodInfo{}, + networkRole: ovntypes.NetworkRolePrimary, // all tests here run with network-segmentation disabled by default by default } var routeSources []*net.IPNet @@ -479,7 +503,7 @@ func getExpectedDataPodsSwitchesPortGroup(netInfo util.NetInfo, pods []testPod, "namespace": pod.namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: pod.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(pod), "iface-id-ver": pod.podName, }, PortSecurity: []string{podAddr}, @@ -2030,7 +2054,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t2.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t2), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, @@ -2045,7 +2069,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { "namespace": t2.namespace, }, Options: map[string]string{ - libovsdbops.RequestedChassis: t2.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t2), //"iface-id-ver": is empty to check that it won't be set on update }, PortSecurity: []string{fmt.Sprintf("%s %s", t2.podMAC, t2.podIP)}, @@ -2060,7 +2084,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t3.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t3), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, @@ -2230,7 +2254,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, Options: map[string]string{ // check requested-chassis will be updated to correct t1.nodeName value - libovsdbops.RequestedChassis: t1.nodeName, + libovsdbops.RequestedChassis: requestedChassisForPod(t1), // check old value for iface-id-ver will be updated to pod.UID "iface-id-ver": "wrong_value", }, diff --git a/go-controller/pkg/ovn/routeimport/route_import.go b/go-controller/pkg/ovn/routeimport/route_import.go index 18c372c276..e99c948edd 100644 --- a/go-controller/pkg/ovn/routeimport/route_import.go +++ b/go-controller/pkg/ovn/routeimport/route_import.go @@ -343,11 +343,13 @@ func (c *controller) syncNetwork(network string) error { c.setTableForNetworkUnlocked(info.GetNetworkID(), table) c.Unlock() - // skip routes in the pod network - // TODO do not skip these routes in no overlay mode - ignoreSubnets := make([]*net.IPNet, len(info.Subnets())) - for i, subnet := range info.Subnets() { - ignoreSubnets[i] = subnet.CIDR + var ignoreSubnets []*net.IPNet + if info.Transport() != types.NetworkTransportNoOverlay { + // if the network is overlay mode, skip routes to the pod network + ignoreSubnets = make([]*net.IPNet, len(info.Subnets())) + for i, subnet := range info.Subnets() { + ignoreSubnets[i] = subnet.CIDR + } } expected, err := c.getBGPRoutes(table, ignoreSubnets) @@ -431,6 +433,7 @@ func (c *controller) getBGPRoutes(table int, ignoreSubnets []*net.IPNet) (sets.S routes := sets.New[route]() for _, nlroute := range nlroutes { if util.IsContainedInAnyCIDR(nlroute.Dst, ignoreSubnets...) { + c.log.V(5).Info("Ignore BGP route", "table", table, "route", stringer{nlroute}) continue } routes.Insert(routesFromNetlinkRoute(&nlroute)...) diff --git a/go-controller/pkg/ovn/routeimport/route_import_test.go b/go-controller/pkg/ovn/routeimport/route_import_test.go index cf71a392a8..b79f93101d 100644 --- a/go-controller/pkg/ovn/routeimport/route_import_test.go +++ b/go-controller/pkg/ovn/routeimport/route_import_test.go @@ -2,6 +2,7 @@ package routeimport import ( "errors" + "net" "sync" "testing" @@ -13,6 +14,7 @@ import ( "k8s.io/client-go/util/workqueue" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" controllerutil "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" ovntesting "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -26,16 +28,33 @@ import ( func Test_controller_syncNetwork(t *testing.T) { node := "testnode" + // Capture original global config values and restore after test + origClusterSubnets := config.Default.ClusterSubnets + t.Cleanup(func() { + config.Default.ClusterSubnets = origClusterSubnets + }) + defaultNetwork := &util.DefaultNetInfo{} defaultNetworkRouter := defaultNetwork.GetNetworkScopedGWRouterName(node) defaultNetworkRouterPort := types.GWRouterToExtSwitchPrefix + defaultNetworkRouter + config.Default.ClusterSubnets = []config.CIDRNetworkEntry{ + { + CIDR: &net.IPNet{ + IP: net.IPv4(10, 128, 0, 0), + Mask: net.CIDRMask(16, 32), + }, + HostSubnetLength: 24, + }, + } + udn := &multinetworkmocks.NetInfo{} udn.On("IsDefault").Return(false) udn.On("GetNetworkName").Return("udn") udn.On("GetNetworkID").Return(1) udn.On("Subnets").Return(nil) udn.On("GetNetworkScopedGWRouterName", node).Return("router") + udn.On("Transport").Return("") cudn := &multinetworkmocks.NetInfo{} cudn.On("IsDefault").Return(false) @@ -43,6 +62,7 @@ func Test_controller_syncNetwork(t *testing.T) { cudn.On("GetNetworkID").Return(2) cudn.On("Subnets").Return(nil) cudn.On("GetNetworkScopedGWRouterName", node).Return("router") + cudn.On("Transport").Return("") type fields struct { networkIDs map[int]string @@ -52,16 +72,17 @@ func Test_controller_syncNetwork(t *testing.T) { network string } tests := []struct { - name string - fields fields - args args - initial []libovsdb.TestData - expected []libovsdb.TestData - routes []netlink.Route - link netlink.Link - linkErr bool - routesErr bool - wantErr bool + name string + fields fields + args args + initial []libovsdb.TestData + expected []libovsdb.TestData + routes []netlink.Route + link netlink.Link + noOverlayEnabled bool + linkErr bool + routesErr bool + wantErr bool }{ { name: "ignored if network not known", @@ -168,11 +189,61 @@ func Test_controller_syncNetwork(t *testing.T) { &nbdb.LogicalRouterStaticRoute{UUID: "untouched-1", IPPrefix: "3.3.3.0/24", Nexthop: "3.3.3.2", ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, }, }, + { + name: "ignores host subnet routes as necessary in overlay mode", + args: args{"default"}, + fields: fields{ + networkIDs: map[int]string{0: "default"}, + networks: map[string]util.NetInfo{"default": defaultNetwork}, + }, + link: &netlink.Vrf{Table: unix.RT_TABLE_MAIN}, + initial: []libovsdb.TestData{ + &nbdb.LogicalRouter{Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + routes: []netlink.Route{ + {Dst: ovntesting.MustParseIPNet("1.1.1.0/24"), Gw: ovntesting.MustParseIP("1.1.1.1")}, + {Dst: ovntesting.MustParseIPNet("10.128.1.0/24"), Gw: ovntesting.MustParseIP("2.2.2.1")}, + }, + expected: []libovsdb.TestData{ + &nbdb.LogicalRouter{UUID: "router", Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + }, + { + name: "adds host subnet routes as necessary in no-overlay mode", + noOverlayEnabled: true, + args: args{"default"}, + fields: fields{ + networkIDs: map[int]string{0: "default"}, + networks: map[string]util.NetInfo{"default": defaultNetwork}, + }, + link: &netlink.Vrf{Table: unix.RT_TABLE_MAIN}, + initial: []libovsdb.TestData{ + &nbdb.LogicalRouter{Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + routes: []netlink.Route{ + {Dst: ovntesting.MustParseIPNet("1.1.1.0/24"), Gw: ovntesting.MustParseIP("1.1.1.1")}, + {Dst: ovntesting.MustParseIPNet("10.128.1.0/24"), Gw: ovntesting.MustParseIP("2.2.2.1")}, + }, + expected: []libovsdb.TestData{ + &nbdb.LogicalRouter{UUID: "router", Name: defaultNetwork.GetNetworkScopedGWRouterName(node), StaticRoutes: []string{"keep-1", "add-1"}}, + &nbdb.LogicalRouterStaticRoute{UUID: "keep-1", IPPrefix: "1.1.1.0/24", Nexthop: "1.1.1.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + &nbdb.LogicalRouterStaticRoute{UUID: "add-1", IPPrefix: "10.128.1.0/24", Nexthop: "2.2.2.1", OutputPort: &defaultNetworkRouterPort, ExternalIDs: map[string]string{controllerExternalIDKey: controllerName}}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := gomega.NewWithT(t) + // Capture and restore global config value for this subtest + origTransport := config.Default.Transport + t.Cleanup(func() { + config.Default.Transport = origTransport + }) + testError := errors.New("test forced error or incorrect test arguments") network := tt.fields.networks[tt.args.network] @@ -211,6 +282,10 @@ func Test_controller_syncNetwork(t *testing.T) { netlink: nlmock, } + if tt.noOverlayEnabled { + config.Default.Transport = types.NetworkTransportNoOverlay + } + err = c.syncNetwork(tt.args.network) if tt.wantErr { g.Expect(err).To(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/topology/topologyfactory.go b/go-controller/pkg/ovn/topology/topologyfactory.go index ead14e05b2..080b306b23 100644 --- a/go-controller/pkg/ovn/topology/topologyfactory.go +++ b/go-controller/pkg/ovn/topology/topologyfactory.go @@ -36,7 +36,7 @@ func (gtf *GatewayTopologyFactory) NewClusterRouterWithMulticastSupport( netInfo util.NetInfo, coopUUID string, ) (*nbdb.LogicalRouter, error) { - routerOptions := map[string]string{"mcast_relay": "true"} + routerOptions := map[string]string{"mcast_relay": "true", "always_learn_from_arp_request": "false"} return gtf.newClusterRouter(clusterRouterName, netInfo, coopUUID, routerOptions) } diff --git a/go-controller/pkg/ovn/topology/topologyfactory_test.go b/go-controller/pkg/ovn/topology/topologyfactory_test.go index 4d189e030a..dbed43ffb5 100644 --- a/go-controller/pkg/ovn/topology/topologyfactory_test.go +++ b/go-controller/pkg/ovn/topology/topologyfactory_test.go @@ -88,7 +88,7 @@ var _ = Describe("Topology factory", func() { ovntypes.TopologyExternalID: ovntypes.Layer3Topology, "k8s-cluster-router": "yes", } - expectedOptions := map[string]string{"mcast_relay": "true"} + expectedOptions := map[string]string{"mcast_relay": "true", "always_learn_from_arp_request": "false"} Expect(clusterRouter).To( WithTransform( removeUUID, diff --git a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go index b838221892..13a370ef90 100644 --- a/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/chassis_handler.go @@ -160,12 +160,18 @@ func (zch *ZoneChassisHandler) createOrUpdateNodeChassis(node *corev1.Node, isRe } chassis := sbdb.Chassis{ - Name: chassisID, - Hostname: node.Name, + Name: chassisID, OtherConfig: map[string]string{ "is-remote": strconv.FormatBool(isRemote), }, } + if isRemote { + // For debugging purposes we add KAPI node name as the chassis hostname. + // It is not used for anything other than a helpful hint for debugging. + // There is no need to set it for the local node, as ovn-controller will + // set it automatically from the OVS external_id:hostname field. + chassis.Hostname = node.Name + } return libovsdbops.CreateOrUpdateChassis(zch.sbClient, &chassis, encaps...) } diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index 23a310c9ab..b269fd2052 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "strconv" + "strings" "time" corev1 "k8s.io/api/core/v1" @@ -173,12 +174,20 @@ func (zic *ZoneInterconnectHandler) createOrUpdateTransitSwitch(networkID int) e // ensureTransitSwitch sets up the global transit switch required for interoperability with other zones // Must wait for network id to be annotated to any node by cluster manager -func (zic *ZoneInterconnectHandler) ensureTransitSwitch(nodes []*corev1.Node) error { - if len(nodes) == 0 { // nothing to do - return nil - } +func (zic *ZoneInterconnectHandler) ensureTransitSwitch() error { start := time.Now() + // Get the transit switch. If its not present no cleanup to do + ts := &nbdb.LogicalSwitch{ + Name: zic.networkTransitSwitchName, + } + + _, err := libovsdbops.GetLogicalSwitch(zic.nbClient, ts) + if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) { + return err + } + + // Create the transit switch if it doesn't exist if err := zic.createOrUpdateTransitSwitch(zic.GetNetworkID()); err != nil { return err } @@ -198,6 +207,10 @@ func (zic *ZoneInterconnectHandler) AddLocalZoneNode(node *corev1.Node) error { return fmt.Errorf("failed to get node id for node - %s", node.Name) } + if err := zic.ensureTransitSwitch(); err != nil { + return fmt.Errorf("ensuring transit switch for local zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + if err := zic.createLocalZoneNodeResources(node, nodeID); err != nil { return fmt.Errorf("creating interconnect resources for local zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) } @@ -257,6 +270,10 @@ func (zic *ZoneInterconnectHandler) AddRemoteZoneNode(node *corev1.Node) error { } } + if err := zic.ensureTransitSwitch(); err != nil { + return fmt.Errorf("ensuring transit switch for remote zone node %s for the network %s failed : err - %w", node.Name, zic.GetNetworkName(), err) + } + klog.Infof("Creating interconnect resources for remote zone node %s for the network %s", node.Name, zic.GetNetworkName()) if err := zic.createRemoteZoneNodeResources(node, nodeID, nodeTransitSwitchPortIPs, nodeSubnets, nodeGRPIPs); err != nil { @@ -273,58 +290,94 @@ func (zic *ZoneInterconnectHandler) DeleteNode(node *corev1.Node) error { return zic.cleanupNode(node.Name) } -// SyncNodes ensures a transit switch exists and cleans up the interconnect -// resources present in the OVN Northbound db for the stale nodes -func (zic *ZoneInterconnectHandler) SyncNodes(objs []interface{}) error { +// CleanupStaleNodes cleans up the interconnect resources for stale nodes. +func (zic *ZoneInterconnectHandler) CleanupStaleNodes(objs []interface{}) error { + // Build set of current node names foundNodeNames := sets.New[string]() - foundNodes := make([]*corev1.Node, len(objs)) - for i, obj := range objs { + for _, obj := range objs { node, ok := obj.(*corev1.Node) if !ok { - return fmt.Errorf("spurious object in syncNodes: %v", obj) + return fmt.Errorf("spurious object in CleanupStaleNodes: %v", obj) } foundNodeNames.Insert(node.Name) - foundNodes[i] = node } + staleNodeNames := sets.New[string]() - // Get the transit switch. If its not present no cleanup to do + // Get the transit switch ts := &nbdb.LogicalSwitch{ Name: zic.networkTransitSwitchName, } - ts, err := libovsdbops.GetLogicalSwitch(zic.nbClient, ts) - if err != nil { - if errors.Is(err, libovsdbclient.ErrNotFound) { - // This can happen for the first time when interconnect is enabled. - // Let's ensure the transit switch exists - return zic.ensureTransitSwitch(foundNodes) - } + if err == nil { + // Transit switch exists - find stale nodes by checking transit switch ports + for _, p := range ts.Ports { + lp := &nbdb.LogicalSwitchPort{ + UUID: p, + } - return err - } + lp, err := libovsdbops.GetLogicalSwitchPort(zic.nbClient, lp) + if err != nil { + continue + } + + if lp.ExternalIDs == nil { + continue + } - staleNodeNames := []string{} - for _, p := range ts.Ports { - lp := &nbdb.LogicalSwitchPort{ - UUID: p, + lportNode := lp.ExternalIDs["node"] + if lportNode != "" && !foundNodeNames.Has(lportNode) { + staleNodeNames.Insert(lportNode) + } + } + } else if errors.Is(err, libovsdbclient.ErrNotFound) { + // Transit switch doesn't exist - discover nodes from cluster router resources + lr := &nbdb.LogicalRouter{Name: zic.networkClusterRouterName} + lr, err = libovsdbops.GetLogicalRouter(zic.nbClient, lr) + if err != nil { + if !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to get cluster router: %w", err) + } + // Router doesn't exist, nothing to cleanup + return nil } - lp, err = libovsdbops.GetLogicalSwitchPort(zic.nbClient, lp) + // Discover remote zone nodes from static routes with ic-node external ID + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(zic.nbClient, lr, p) if err != nil { - continue + return fmt.Errorf("failed to get static routes for cluster router: %w", err) } - if lp.ExternalIDs == nil { - continue + for _, route := range routes { + nodeName := route.ExternalIDs["ic-node"] + if nodeName != "" && !foundNodeNames.Has(nodeName) { + staleNodeNames.Insert(nodeName) + } } - lportNode := lp.ExternalIDs["node"] - if !foundNodeNames.Has(lportNode) { - staleNodeNames = append(staleNodeNames, lportNode) + // Discover local zone nodes from router ports connecting to transit switch + routerPortPrefix := zic.GetNetworkScopedName(types.RouterToTransitSwitchPrefix) + for _, portUUID := range lr.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(zic.nbClient, &nbdb.LogicalRouterPort{UUID: portUUID}) + if err != nil { + continue + } + // Extract node name from port name (e.g., "rtots-node1" -> "node1") + if nodeName, found := strings.CutPrefix(lrp.Name, routerPortPrefix); found { + if nodeName != "" && !foundNodeNames.Has(nodeName) { + staleNodeNames.Insert(nodeName) + } + } } + } else { + // Unexpected error + return fmt.Errorf("unexpected error while getting transit switch: %w", err) } - for _, staleNodeName := range staleNodeNames { + // Cleanup stale interconnect resources + for _, staleNodeName := range staleNodeNames.UnsortedList() { if err := zic.cleanupNode(staleNodeName); err != nil { klog.Errorf("Failed to cleanup the interconnect resources from OVN Northbound db for the stale node %s: %v", staleNodeName, err) } @@ -333,10 +386,25 @@ func (zic *ZoneInterconnectHandler) SyncNodes(objs []interface{}) error { return nil } -// Cleanup deletes the transit switch for the network +// Cleanup deletes all interconnect resources for the network, including all node resources +// (ports, router ports, static routes) and the transit switch itself. This method is idempotent +// and safe to call multiple times. func (zic *ZoneInterconnectHandler) Cleanup() error { + klog.Infof("Cleaning up all interconnect resources for network %s", zic.GetNetworkName()) + + // First cleanup all node resources (ports, routes, etc.) + // Passing nil removes all nodes from the transit switch + if err := zic.CleanupStaleNodes(nil); err != nil { + return fmt.Errorf("failed to cleanup node resources: %w", err) + } + + // Then delete the transit switch klog.Infof("Deleting the transit switch %s for the network %s", zic.networkTransitSwitchName, zic.GetNetworkName()) - return libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName) + if err := libovsdbops.DeleteLogicalSwitch(zic.nbClient, zic.networkTransitSwitchName); err != nil && + !errors.Is(err, libovsdbclient.ErrNotFound) { + return fmt.Errorf("failed to delete transit switch: %w", err) + } + return nil } // AddTransitSwitchConfig is only used by the layer2 network controller @@ -384,7 +452,6 @@ func (zic *ZoneInterconnectHandler) addTransitSwitchConfig(sw *nbdb.LogicalSwitc } // createLocalZoneNodeResources creates the local zone node resources for interconnect -// - creates Transit switch if it doesn't yet exit // - creates a logical switch port of type "router" in the transit switch with the name as - .tstor- // Eg. if the node name is ovn-worker and the network is default, the name would be - tstor-ovn-worker // if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor-ovn-worker @@ -442,7 +509,6 @@ func (zic *ZoneInterconnectHandler) createLocalZoneNodeResources(node *corev1.No } // createRemoteZoneNodeResources creates the remote zone node resources -// - creates Transit switch if it doesn't yet exit // - creates a logical port of type "remote" in the transit switch with the name as - .tstor. // Eg. if the node name is ovn-worker and the network is default, the name would be - tstor.ovn-worker // if the node name is ovn-worker and the network name is blue, the logical port name would be - blue.tstor.ovn-worker @@ -460,9 +526,18 @@ func (zic *ZoneInterconnectHandler) createRemoteZoneNodeResources(node *corev1.N remotePortAddr = remotePortAddr + " " + tsNetwork } + chassisID, err := util.ParseNodeChassisIDAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + // remote node may not have the annotation yet, suppress it + return types.NewSuppressedError(err) + } + return fmt.Errorf("failed to parse node chassis-id for node %s: %w", node.Name, err) + } + lspOptions := map[string]string{ libovsdbops.RequestedTnlKey: strconv.Itoa(nodeID), - libovsdbops.RequestedChassis: node.Name, + libovsdbops.RequestedChassis: chassisID, } // Store the node name in the external_ids column for book keeping externalIDs := map[string]string{ diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go index e138037031..76f872e00b 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler_test.go @@ -35,8 +35,8 @@ const ( // ovnNodeZoneNameAnnotation is the node annotation name to store the node zone name. ovnNodeZoneNameAnnotation = "k8s.ovn.org/zone-name" - // ovnNodeChassisIDAnnotatin is the node annotation name to store the node chassis id. - ovnNodeChassisIDAnnotatin = "k8s.ovn.org/node-chassis-id" + // ovnNodeChassisIDAnnotation is the node annotation name to store the node chassis id. + ovnNodeChassisIDAnnotation = "k8s.ovn.org/node-chassis-id" // ovnNodeSubnetsAnnotation is the node annotation name to store the node subnets. ovnNodeSubnetsAnnotation = "k8s.ovn.org/node-subnets" @@ -298,7 +298,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.2.0/24\"]}", @@ -315,7 +315,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.3.0/24\"]}", @@ -332,7 +332,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"default\":[\"10.244.4.0/24\"]}", @@ -562,11 +562,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + // Call ICHandler CleanupStaleNodes function removing the testNode3 from the list of nodes var kNodes []interface{} kNodes = append(kNodes, &testNode1) kNodes = append(kNodes, &testNode2) - err = zoneICHandler.SyncNodes(kNodes) + err = zoneICHandler.CleanupStaleNodes(kNodes) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = checkInterconnectResources("global", types.DefaultNetworkName, libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -583,6 +583,239 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + + ginkgo.It("CleanupStaleNodes with nil should cleanup all transit switch ports for no-overlay migration", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient, nil) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + // Create transit switch and add nodes (simulating previous overlay configuration) + err = zoneICHandler.createOrUpdateTransitSwitch(0) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Set up nodes: testNode1 as local zone, testNode2 and testNode3 as remote zones + testNode2.Annotations[ovnNodeZoneNameAnnotation] = "remote-zone-1" + testNode3.Annotations[ovnNodeZoneNameAnnotation] = "remote-zone-2" + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch exists with ports + ts, err := libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(ts.Ports).NotTo(gomega.BeEmpty(), "Transit switch should have ports before cleanup") + + // Verify IC router ports exist (for local zone node) + clusterRouter, err := libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + icRouterPorts := 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Should have router port for local zone node before cleanup") + + // Verify IC static routes exist (for remote zone nodes) + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).NotTo(gomega.BeEmpty(), "Should have IC static routes for remote zone nodes before cleanup") + + // Call CleanupStaleNodes with nil to simulate no-overlay migration + // nil means "no current IC nodes", so all nodes become stale and should be cleaned up + err = zoneICHandler.CleanupStaleNodes(nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify all transit switch ports are cleaned up + ts, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(ts.Ports).To(gomega.BeEmpty(), "Transit switch ports should be cleaned up") + + // Verify all IC router ports are cleaned up (local zone node resources) + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(0), "All IC router ports should be cleaned up") + + // Verify all IC static routes are cleaned up (remote zone node resources) + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).To(gomega.BeEmpty(), "All IC static routes should be cleaned up") + + // Now call Cleanup to remove all interconnect resources (transit switch and any remaining nodes) + err = zoneICHandler.Cleanup() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch is deleted + _, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).To(gomega.MatchError(libovsdbclient.ErrNotFound)) + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("CleanupStaleNodes with nil should cleanup orphaned IC resources when transit switch doesn't exist", func() { + app.Action = func(ctx *cli.Context) error { + dbSetup := libovsdbtest.TestSetup{ + NBData: initialNBDB, + SBData: initialSBDB, + } + + _, err := config.InitConfig(ctx, nil, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + config.Kubernetes.HostNetworkNamespace = "" + + var libovsdbOvnNBClient, libovsdbOvnSBClient libovsdbclient.Client + libovsdbOvnNBClient, libovsdbOvnSBClient, libovsdbCleanup, err = libovsdbtest.NewNBSBTestHarness(dbSetup) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = createTransitSwitchPortBindings(libovsdbOvnSBClient, types.DefaultNetworkName, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + zoneICHandler := NewZoneInterconnectHandler(&util.DefaultNetInfo{}, libovsdbOvnNBClient, libovsdbOvnSBClient, nil) + gomega.Expect(zoneICHandler).NotTo(gomega.BeNil()) + + // Create transit switch and add nodes (simulating previous IC configuration) + err = zoneICHandler.createOrUpdateTransitSwitch(0) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Add testNode1 as local zone, testNode2 and testNode3 as remote zone + testNode2.Annotations[ovnNodeZoneNameAnnotation] = "remote" + testNode3.Annotations[ovnNodeZoneNameAnnotation] = "remote" + err = invokeICHandlerAddNodeFunction("global", zoneICHandler, &testNode1, &testNode2, &testNode3) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify IC resources exist + clusterRouter, err := libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Count IC router ports (for local zone nodes) + icRouterPorts := 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Should have router port for local zone node (node1)") + + // Count IC static routes (for remote zone nodes) + p := func(route *nbdb.LogicalRouterStaticRoute) bool { + return route.ExternalIDs != nil && route.ExternalIDs["ic-node"] != "" + } + routes, err := libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).ToNot(gomega.BeEmpty(), "Should have IC static routes for remote zone nodes (node2, node3)") + + // Manually delete the transit switch to simulate the resource leak scenario + // This leaves orphaned router ports and static routes + err = libovsdbops.DeleteLogicalSwitch(libovsdbOvnNBClient, types.TransitSwitch) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify transit switch is gone + _, err = libovsdbops.GetLogicalSwitch(libovsdbOvnNBClient, &nbdb.LogicalSwitch{Name: types.TransitSwitch}) + gomega.Expect(err).To(gomega.MatchError(libovsdbclient.ErrNotFound)) + + // Verify orphaned resources still exist + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(1), "Router port should still exist before cleanup (the leak)") + + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).ToNot(gomega.BeEmpty(), "IC static routes should still exist before cleanup (the leak)") + + // Call CleanupStaleNodes with nil - should discover all nodes and clean them + err = zoneICHandler.CleanupStaleNodes(nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Verify all router ports are cleaned up + clusterRouter, err = libovsdbops.GetLogicalRouter(libovsdbOvnNBClient, &nbdb.LogicalRouter{Name: types.OVNClusterRouter}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + icRouterPorts = 0 + for _, p := range clusterRouter.Ports { + lrp, err := libovsdbops.GetLogicalRouterPort(libovsdbOvnNBClient, &nbdb.LogicalRouterPort{UUID: p}) + if err != nil { + continue + } + if len(lrp.Name) >= len(types.RouterToTransitSwitchPrefix) && lrp.Name[:len(types.RouterToTransitSwitchPrefix)] == types.RouterToTransitSwitchPrefix { + icRouterPorts++ + } + } + gomega.Expect(icRouterPorts).To(gomega.Equal(0), "All router ports should be cleaned up") + + // Verify all IC static routes are cleaned up + routes, err = libovsdbops.GetRouterLogicalRouterStaticRoutesWithPredicate(libovsdbOvnNBClient, clusterRouter, p) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(routes).To(gomega.BeEmpty(), "All IC static routes should be cleaned up") + + return nil + } + + err := app.Run([]string{ + app.Name, + "-cluster-subnets=" + clusterCIDR, + "-init-cluster-manager", + "-zone-join-switch-subnets=" + joinSubnetCIDR, + "-enable-interconnect", + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) ginkgo.Context("Secondary networks", func() { @@ -591,7 +824,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.2.0/24\"]}", @@ -608,7 +841,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.3.0/24\"]}", @@ -625,7 +858,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"blue\":[\"10.244.4.0/24\"]}", @@ -718,11 +951,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2, &testNode3) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Call ICHandler SyncNodes function removing the testNode3 from the list of nodes + // Call ICHandler CleanupStaleNodes function removing the testNode3 from the list of nodes var kNodes []interface{} kNodes = append(kNodes, &testNode1) kNodes = append(kNodes, &testNode2) - err = zoneICHandler.SyncNodes(kNodes) + err = zoneICHandler.CleanupStaleNodes(kNodes) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = checkInterconnectResources("global", "blue", libovsdbOvnNBClient, testNodesRouteInfo, &testNode1, &testNode2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -746,7 +979,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node1", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", ovnNodeZoneNameAnnotation: "global", ovnNodeIDAnnotaton: "2", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.2.0/24\"], \"blue\":[\"11.244.2.0/24\"]}", @@ -763,7 +996,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node2", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac7", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "3", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.3.0/24\"], \"blue\":[\"11.244.3.0/24\"]}", @@ -780,7 +1013,7 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node3", Annotations: map[string]string{ - ovnNodeChassisIDAnnotatin: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", + ovnNodeChassisIDAnnotation: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac8", ovnNodeZoneNameAnnotation: "foo", ovnNodeIDAnnotaton: "4", ovnNodeSubnetsAnnotation: "{\"red\":[\"10.244.4.0/24\"], \"blue\":[\"11.244.4.0/24\"]}", @@ -1004,6 +1237,11 @@ var _ = ginkgo.Describe("Zone Interconnect Operations", func() { // Set the node transit switch port ips testNode4.Annotations[ovnTransitSwitchPortAddrAnnotation] = "{\"ipv4\":\"100.88.0.5/16\"}" err = zoneICHandler.AddRemoteZoneNode(&testNode4) + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("k8s.ovn.org/node-chassis-id annotation not found for node node4"))) + + // Set chassis-id annotation + testNode4.Annotations[ovnNodeChassisIDAnnotation] = "c44f341d-2862-4fbe-8b93-10e98b0fa84f" + err = zoneICHandler.AddRemoteZoneNode(&testNode4) gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("failed to create static route ops: unable to get logical router static routes with predicate on router ovn_cluster_router"))) // Create the cluster router diff --git a/go-controller/pkg/ovnwebhook/nodeadmission.go b/go-controller/pkg/ovnwebhook/nodeadmission.go index e7dc733371..74cfe79e46 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission.go @@ -59,14 +59,6 @@ var commonNodeAnnotationChecks = map[string]checkNodeAnnot{ // interconnectNodeAnnotationChecks holds annotations allowed for ovnkube-node: users in IC environments var interconnectNodeAnnotationChecks = map[string]checkNodeAnnot{ - util.OvnNodeMigratedZoneName: func(v annotationChange, nodeName string) error { - // it is allowed for the annotation to be set to - if (v.action == added || v.action == changed) && v.value == nodeName { - return nil - } - - return fmt.Errorf("%s can only be set to %s, it cannot be removed", util.OvnNodeMigratedZoneName, nodeName) - }, util.Layer2TopologyVersion: func(v annotationChange, _ string) error { // it is allowed for the annotation to be added or removed if v.action == added || v.action == removed { diff --git a/go-controller/pkg/ovnwebhook/nodeadmission_test.go b/go-controller/pkg/ovnwebhook/nodeadmission_test.go index 53975940b3..bb3a946533 100644 --- a/go-controller/pkg/ovnwebhook/nodeadmission_test.go +++ b/go-controller/pkg/ovnwebhook/nodeadmission_test.go @@ -403,47 +403,6 @@ func TestNodeAdmission_ValidateUpdate(t *testing.T) { }) } } -func TestNodeAdmission_ValidateUpdateIC(t *testing.T) { - adm := NewNodeAdmissionWebhook(true, false) - tests := []struct { - name string - ctx context.Context - oldObj runtime.Object - newObj runtime.Object - expectedErr error - }{ - { - name: "ovnkube-node cannot set util.OvnNodeMigratedZoneName to anything else than ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: userName, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: "global"}, - }, - }, - expectedErr: fmt.Errorf("user: %q is not allowed to set %s on node %q: %s can only be set to %s, it cannot be removed", userName, util.OvnNodeMigratedZoneName, nodeName, util.OvnNodeMigratedZoneName, nodeName), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := adm.ValidateUpdate(tt.ctx, tt.oldObj, tt.newObj) - if err != tt.expectedErr && err.Error() != tt.expectedErr.Error() { - t.Errorf("ValidateUpdateIC() error = %v, wantErr %v", err, tt.expectedErr) - return - } - }) - } -} - func TestNodeAdmission_ValidateUpdateHybridOverlay(t *testing.T) { adm := NewNodeAdmissionWebhook(false, true) tests := []struct { @@ -502,64 +461,3 @@ func TestNodeAdmission_ValidateUpdateHybridOverlay(t *testing.T) { }) } } - -func TestNodeAdmission_ValidateUpdateExtraUsers(t *testing.T) { - extraUser := "system:serviceaccount:ovnkube-cluster-manager" - adm := NewNodeAdmissionWebhook(true, false, extraUser) - tests := []struct { - name string - ctx context.Context - oldObj runtime.Object - newObj runtime.Object - expectedErr error - }{ - { - name: "extra user cannot set util.OvnNodeMigratedZoneName to anything else than ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: extraUser, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: "global"}, - }, - }, - expectedErr: fmt.Errorf("user: %q is not allowed to set %s on node %q: %s can only be set to %s, it cannot be removed", extraUser, util.OvnNodeMigratedZoneName, nodeName, util.OvnNodeMigratedZoneName, nodeName), - }, - { - name: "extra user can set util.OvnNodeMigratedZoneName to ", - ctx: admission.NewContextWithRequest(context.TODO(), admission.Request{ - AdmissionRequest: v1.AdmissionRequest{UserInfo: authenticationv1.UserInfo{ - Username: extraUser, - }}, - }), - oldObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - }, - }, - newObj: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Annotations: map[string]string{util.OvnNodeMigratedZoneName: nodeName}, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := adm.ValidateUpdate(tt.ctx, tt.oldObj, tt.newObj) - if err != tt.expectedErr && err.Error() != tt.expectedErr.Error() { - t.Errorf("ValidateUpdateIC() error = %v, wantErr %v", err, tt.expectedErr) - return - } - }) - } -} diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 1b26e7b9d5..fc979dec24 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -252,6 +252,11 @@ const ( NetworkRoleInfrastructure = "infrastructure-locked" NetworkRoleNone = "none" + // Network transport types - canonical format (lowercase) + NetworkTransportGeneve = "geneve" + NetworkTransportNoOverlay = "no-overlay" + NetworkTransportEVPN = "evpn" + // db index keys // PrimaryIDKey is used as a primary client index PrimaryIDKey = OvnK8sPrefix + "/id" diff --git a/go-controller/pkg/util/dns.go b/go-controller/pkg/util/dns.go index 9466ad16f5..86d8a9e054 100644 --- a/go-controller/pkg/util/dns.go +++ b/go-controller/pkg/util/dns.go @@ -16,8 +16,12 @@ import ( ) const ( - // defaultTTL is used if an invalid or zero TTL is provided. - defaultTTL = 30 * time.Minute + // defaultMinTTL is the minimum TTL value that will be used for a domain name if an invalid or zero TTL is found + defaultMinTTL = 5 * time.Second + // defaultMaxTTL is the maximum TTL value that will be used for a domain name if an invalid or zero TTL is found + defaultMaxTTL = 2 * time.Minute + // maxRetryBeforeBackoff is the maximum number of times to retry a DNS lookup before exponential backoff starts + maxRetryBeforeBackoff = 10 ) type dnsValue struct { @@ -27,6 +31,8 @@ type dnsValue struct { ttl time.Duration // Holds (last dns lookup time + ttl), tells when to refresh IPs next time nextQueryTime time.Time + // Number of times the DNS lookup has been retried before backoff starts + retryCount int } type DNS struct { @@ -105,11 +111,22 @@ func (d *DNS) updateOne(dns string) (bool, error) { return false, fmt.Errorf("DNS value not found in dnsMap for domain: %q", dns) } - ips, ttl, err := d.getIPsAndMinTTL(dns) - if err != nil { - res.nextQueryTime = time.Now().Add(defaultTTL) - d.dnsMap[dns] = res - return false, err + ips, ttl, retry, err := d.getIPsAndMinTTL(dns) + if retry { + // If the DNS lookup has been retried maxRetryCount times, use exponential backoff + // by doubling the previous TTL. The TTL is capped at defaultMaxTTL. + if res.retryCount >= maxRetryBeforeBackoff { + ttl = min(res.ttl*2, defaultMaxTTL) + } else { + // Increment the retry count + res.retryCount++ + } + // If no valid IPs were found, use the previous IPs as fallback. + if len(ips) == 0 { + ips = res.ips + } + } else { + res.retryCount = 0 } changed := false @@ -120,10 +137,10 @@ func (d *DNS) updateOne(dns string) (bool, error) { res.ttl = ttl res.nextQueryTime = time.Now().Add(res.ttl) d.dnsMap[dns] = res - return changed, nil + return changed, err } -func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, error) { +func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, bool, error) { ips := []net.IP{} ttlSet := false var ttlSeconds uint32 @@ -197,19 +214,27 @@ func (d *DNS) getIPsAndMinTTL(domain string) ([]net.IP, time.Duration, error) { } if !ttlSet || (len(ips) == 0) { - return nil, defaultTTL, fmt.Errorf("IPv4 or IPv6 addr not found for domain: %q, nameservers: %v", domain, d.nameservers) + return nil, defaultMinTTL, true, fmt.Errorf("IPv4 or IPv6 addr not found for domain: %q, nameservers: %v", domain, d.nameservers) } + ips = removeDuplicateIPs(ips) + ttl, err := time.ParseDuration(fmt.Sprintf("%ds", minTTL)) if err != nil { - utilruntime.HandleError(fmt.Errorf("invalid TTL value for domain: %q, err: %v, defaulting ttl=%s", domain, err, defaultTTL.String())) - ttl = defaultTTL + utilruntime.HandleError(fmt.Errorf("invalid TTL value for domain: %q, err: %v", domain, err)) + return ips, defaultMinTTL, true, nil } if ttl == 0 { - ttl = defaultTTL + // If the TTL is 0, return the default minimum TTL. The retry is set to false as this + // is not an error scenario. TTL being 0 is a valid scenario for some DNS servers + // and it means that the IP addresses should be refreshed everytime whenever the DNS + // name is being used. From the point of view of OVN-Kubernetes, the IP addresses are + // refreshed every defaultMinTTL. + klog.V(5).Infof("TTL value is 0 for domain: %q, defaulting ttl=%s", domain, defaultMinTTL.String()) + return ips, defaultMinTTL, false, nil } - return removeDuplicateIPs(ips), ttl, nil + return ips, ttl, false, nil } func (d *DNS) GetNextQueryTime() (time.Time, string, bool) { diff --git a/go-controller/pkg/util/dns_test.go b/go-controller/pkg/util/dns_test.go index a9d248042b..9f40c176ba 100644 --- a/go-controller/pkg/util/dns_test.go +++ b/go-controller/pkg/util/dns_test.go @@ -70,13 +70,16 @@ func TestGetIPsAndMinTTL(t *testing.T) { tests := []struct { desc string errExp bool + retry bool ipv4Mode bool ipv6Mode bool dnsOpsMockHelper []ovntest.TestifyMockHelper + expectedTTL time.Duration }{ { desc: "call to Exchange fails IPv4 only", errExp: true, + retry: true, ipv4Mode: true, ipv6Mode: false, dnsOpsMockHelper: []ovntest.TestifyMockHelper{ @@ -89,10 +92,12 @@ func TestGetIPsAndMinTTL(t *testing.T) { CallTimes: 1, }, }, + expectedTTL: defaultMinTTL, }, { desc: "Exchange returns correctly but Rcode != RcodeSuccess IPv4 only", errExp: true, + retry: true, ipv4Mode: true, ipv6Mode: false, dnsOpsMockHelper: []ovntest.TestifyMockHelper{ @@ -105,6 +110,46 @@ func TestGetIPsAndMinTTL(t *testing.T) { CallTimes: 1, }, }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but with TTL 0 IPv4 only", + errExp: false, + retry: false, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{A: net.ParseIP("1.2.3.4")}}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but no Answer IPv4 only", + errExp: true, + retry: true, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: defaultMinTTL, + }, + { + desc: "Exchange returns correctly but with non-zero TTL IPv4 only", + errExp: false, + retry: false, + ipv4Mode: true, + ipv6Mode: false, + dnsOpsMockHelper: []ovntest.TestifyMockHelper{ + {OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1}, + {OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{"www.test.com"}, CallTimes: 1}, + {OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{Hdr: dns.RR_Header{Ttl: 100}, A: net.ParseIP("1.2.3.4")}}}, 0 * time.Second, nil}, CallTimes: 1}, + }, + expectedTTL: 100 * time.Second, }, } @@ -128,19 +173,22 @@ func TestGetIPsAndMinTTL(t *testing.T) { } config.IPv4Mode = tc.ipv4Mode config.IPv6Mode = tc.ipv6Mode - res, _, err := testDNS.getIPsAndMinTTL("www.test.com") - t.Log(res, err) + res, ttl, retry, err := testDNS.getIPsAndMinTTL("www.test.com") + t.Log(res, ttl, retry, err) if tc.errExp { require.Error(t, err) } else { require.NoError(t, err) } + assert.Equal(t, tc.retry, retry, "the exponentialBackoff variable should match the return from dns.getIPsAndMinTTL()") + assert.Equal(t, tc.expectedTTL, ttl, "the ttl variable should match the return from dns.getIPsAndMinTTL()") mockDNSOps.AssertExpectations(t) }) } } func TestUpdate(t *testing.T) { + config.IPv4Mode = true mockDNSOps := new(util_mocks.DNSOps) SetDNSLibOpsMockInst(mockDNSOps) @@ -252,6 +300,7 @@ func TestUpdate(t *testing.T) { } func TestAdd(t *testing.T) { + config.IPv4Mode = true dnsName := "www.testing.com" mockDNSOps := new(util_mocks.DNSOps) SetDNSLibOpsMockInst(mockDNSOps) @@ -319,3 +368,211 @@ func TestAdd(t *testing.T) { } } + +func TestIPsEqual(t *testing.T) { + tests := []struct { + desc string + oldips []net.IP + newips []net.IP + expEqual bool + }{ + { + desc: "oldips and newips are the same", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.4")}, + expEqual: true, + }, + { + desc: "oldips and newips are different", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips and newips are different length", + oldips: []net.IP{net.ParseIP("1.2.3.4")}, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is nil and newips is not nil", + oldips: nil, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is empty and newips is not empty", + oldips: []net.IP{}, + newips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + expEqual: false, + }, + { + desc: "oldips is not nil and newips is nil", + oldips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + newips: nil, + expEqual: false, + }, + { + desc: "oldips is not empty and newips is empty", + oldips: []net.IP{net.ParseIP("1.2.3.4"), net.ParseIP("1.2.3.5")}, + newips: []net.IP{}, + expEqual: false, + }, + { + desc: "oldips and newips are both nil", + oldips: nil, + newips: nil, + expEqual: true, + }, + { + desc: "oldips and newips are both empty", + oldips: []net.IP{}, + newips: []net.IP{}, + expEqual: true, + }, + { + desc: "oldips is nil and newips is empty", + oldips: nil, + newips: []net.IP{}, + expEqual: true, + }, + { + desc: "oldips is empty and newips is nil", + oldips: []net.IP{}, + newips: nil, + expEqual: true, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + res := ipsEqual(tc.oldips, tc.newips) + assert.Equal(t, tc.expEqual, res) + }) + } +} + +func TestUpdateOne(t *testing.T) { + config.IPv4Mode = true + dnsName := "www.testing.com" + newIP := net.ParseIP("1.2.3.4") + fqdnOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Fqdn", OnCallMethodArgType: []string{"string"}, RetArgList: []interface{}{dnsName}, CallTimes: 1, + } + setQuestionOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "SetQuestion", OnCallMethodArgType: []string{"*dns.Msg", "string", "uint16"}, RetArgList: []interface{}{&dns.Msg{}}, CallTimes: 1, + } + exchangeSuccessNoAnswerOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeSuccessZeroTTLOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{A: newIP}}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeSuccessNonZeroTTLOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeSuccess}, Answer: []dns.RR{&dns.A{Hdr: dns.RR_Header{Ttl: 100}, A: newIP}}}, 0 * time.Second, nil}, CallTimes: 1, + } + exchangeFailureOpsMockHelper := ovntest.TestifyMockHelper{ + OnCallMethodName: "Exchange", OnCallMethodArgType: []string{"*dns.Client", "*dns.Msg", "string"}, RetArgList: []interface{}{&dns.Msg{MsgHdr: dns.MsgHdr{Rcode: dns.RcodeServerFailure}}, 0 * time.Second, nil}, CallTimes: 1, + } + tests := []struct { + desc string + numCalls int + exchangeOpsMockHelper ovntest.TestifyMockHelper + expTTL time.Duration + }{ + { + desc: "when Exchange function returns with Rcode != RcodeSuccess, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns successfully but without Answer, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessNoAnswerOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function, defaultMinTTL is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function 2 times, defaultMinTTL is used", + numCalls: 2, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when TTL returned is 0 by Exchange function 11 times, defaultMinTTL is used", + numCalls: 11, + exchangeOpsMockHelper: exchangeSuccessZeroTTLOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess twice, defaultMinTTL is used", + numCalls: 2, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 10 times, defaultMinTTL is used", + numCalls: 10, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 11 times, defaultMinTTL is doubled", + numCalls: 11, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: 2 * defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 14 times, 16 (2^4) times defaultMinTTL is used", + numCalls: 14, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: 16 * defaultMinTTL, + }, + { + desc: "when Exchange function returns with Rcode != RcodeSuccess 15 times, defaultMaxTTL is used", + numCalls: 15, + exchangeOpsMockHelper: exchangeFailureOpsMockHelper, + expTTL: defaultMaxTTL, + }, + { + desc: "when TTL returned is non-zero by Exchange function, it is used", + numCalls: 1, + exchangeOpsMockHelper: exchangeSuccessNonZeroTTLOpsMockHelper, + expTTL: 100 * time.Second, + }, + } + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + mockDNSOps := new(util_mocks.DNSOps) + SetDNSLibOpsMockInst(mockDNSOps) + dnsOpsMockHelper := []ovntest.TestifyMockHelper{fqdnOpsMockHelper, setQuestionOpsMockHelper, tc.exchangeOpsMockHelper} + for index := 0; index < tc.numCalls; index++ { + for _, item := range dnsOpsMockHelper { + call := mockDNSOps.On(item.OnCallMethodName) + for _, arg := range item.OnCallMethodArgType { + call.Arguments = append(call.Arguments, mock.AnythingOfType(arg)) + } + for _, ret := range item.RetArgList { + call.ReturnArguments = append(call.ReturnArguments, ret) + } + call.Once() + } + } + dns := DNS{ + dnsMap: make(map[string]dnsValue), + nameservers: []string{"1.1.1.1"}, + } + dns.dnsMap[dnsName] = dnsValue{} + for i := 0; i < tc.numCalls; i++ { + _, _ = dns.updateOne(dnsName) + } + assert.Equal(t, tc.expTTL, dns.dnsMap[dnsName].ttl) + mockDNSOps.AssertExpectations(t) + }) + } +} diff --git a/go-controller/pkg/util/fake_client.go b/go-controller/pkg/util/fake_client.go index 0ca981e849..e78010d572 100644 --- a/go-controller/pkg/util/fake_client.go +++ b/go-controller/pkg/util/fake_client.go @@ -39,6 +39,8 @@ import ( routeadvertisementsfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned/fake" udnv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" udnfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake" + vtepv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1" + vtepfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned/fake" ) func GetOVNClientset(objects ...runtime.Object) *OVNClientset { @@ -58,6 +60,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { raObjects := []runtime.Object{} frrObjects := []runtime.Object{} networkConnectObjects := []runtime.Object{} + vtepObjects := []runtime.Object{} for _, object := range objects { switch object.(type) { case *egressip.EgressIP: @@ -90,6 +93,8 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { networkQoSObjects = append(networkQoSObjects, object) case *networkconnect.ClusterNetworkConnect: networkConnectObjects = append(networkConnectObjects, object) + case *vtepv1.VTEP: + vtepObjects = append(vtepObjects, object) default: v1Objects = append(v1Objects, object) } @@ -119,6 +124,7 @@ func GetOVNClientset(objects ...runtime.Object) *OVNClientset { FRRClient: frrfake.NewSimpleClientset(frrObjects...), NetworkQoSClient: networkqosfake.NewSimpleClientset(networkQoSObjects...), NetworkConnectClient: networkconnectfake.NewSimpleClientset(networkConnectObjects...), + VTEPClient: vtepfake.NewSimpleClientset(vtepObjects...), } } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index ad551442ab..b5e314d315 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -54,6 +54,7 @@ import ( networkqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/networkqos/v1alpha1/apis/clientset/versioned" routeadvertisementsclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/routeadvertisements/v1/apis/clientset/versioned" userdefinednetworkclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned" + vtepclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/vtep/v1/apis/clientset/versioned" ) // OVNClientset is a wrapper around all clientsets used by OVN-Kubernetes @@ -75,6 +76,7 @@ type OVNClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } // OVNMasterClientset @@ -95,6 +97,7 @@ type OVNMasterClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } // OVNKubeControllerClientset @@ -143,6 +146,7 @@ type OVNClusterManagerClientset struct { RouteAdvertisementsClient routeadvertisementsclientset.Interface FRRClient frrclientset.Interface NetworkQoSClient networkqosclientset.Interface + VTEPClient vtepclientset.Interface } const ( @@ -173,6 +177,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, NetworkQoSClient: cs.NetworkQoSClient, + VTEPClient: cs.VTEPClient, } } @@ -233,6 +238,7 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset RouteAdvertisementsClient: cs.RouteAdvertisementsClient, FRRClient: cs.FRRClient, NetworkQoSClient: cs.NetworkQoSClient, + VTEPClient: cs.VTEPClient, } } @@ -547,6 +553,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + vtepClientset, err := vtepclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, ANPClient: anpClientset, @@ -565,6 +576,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { RouteAdvertisementsClient: routeAdvertisementsClientset, FRRClient: frrClientset, NetworkQoSClient: networkqosClientset, + VTEPClient: vtepClientset, }, nil } diff --git a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go index f382625105..edaef77470 100644 --- a/go-controller/pkg/util/mocks/multinetwork/NetInfo.go +++ b/go-controller/pkg/util/mocks/multinetwork/NetInfo.go @@ -52,6 +52,24 @@ func (_m *NetInfo) EVPNIPVRFRouteTarget() string { return r0 } +// EVPNIPVRFVID provides a mock function with no fields +func (_m *NetInfo) EVPNIPVRFVID() int { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for EVPNIPVRFVID") + } + + var r0 int + if rf, ok := ret.Get(0).(func() int); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int) + } + + return r0 +} + // EVPNIPVRFVNI provides a mock function with no fields func (_m *NetInfo) EVPNIPVRFVNI() int32 { ret := _m.Called() @@ -88,6 +106,24 @@ func (_m *NetInfo) EVPNMACVRFRouteTarget() string { return r0 } +// EVPNMACVRFVID provides a mock function with no fields +func (_m *NetInfo) EVPNMACVRFVID() int { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for EVPNMACVRFVID") + } + + var r0 int + if rf, ok := ret.Get(0).(func() int); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int) + } + + return r0 +} + // EVPNMACVRFVNI provides a mock function with no fields func (_m *NetInfo) EVPNMACVRFVNI() int32 { ret := _m.Called() diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 30a8bc4eef..878f8666cd 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -60,8 +60,10 @@ type NetInfo interface { EVPNVTEPName() string EVPNMACVRFVNI() int32 EVPNMACVRFRouteTarget() string + EVPNMACVRFVID() int EVPNIPVRFVNI() int32 EVPNIPVRFRouteTarget() string + EVPNIPVRFVID() int GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet GetNodeManagementIP(hostSubnet *net.IPNet) *net.IPNet @@ -459,6 +461,8 @@ func (nInfo *mutableNetInfo) getNamespaces() sets.Set[string] { } func (nInfo *mutableNetInfo) GetNADNamespaces() []string { + nInfo.RLock() + defer nInfo.RUnlock() return nInfo.getNamespaces().UnsortedList() } @@ -658,7 +662,7 @@ func (nInfo *DefaultNetInfo) PhysicalNetworkName() string { // Transport returns the transport protocol for east-west traffic func (nInfo *DefaultNetInfo) Transport() string { - return "" + return config.Default.Transport } // EVPNVTEPName returns empty as EVPN is not supported on the default network @@ -686,6 +690,16 @@ func (nInfo *DefaultNetInfo) EVPNIPVRFRouteTarget() string { return "" } +// EVPNMACVRFVID returns 0 as EVPN is not supported on the default network +func (nInfo *DefaultNetInfo) EVPNMACVRFVID() int { + return 0 +} + +// EVPNIPVRFVID returns 0 as EVPN is not supported on the default network +func (nInfo *DefaultNetInfo) EVPNIPVRFVID() int { + return 0 +} + func (nInfo *DefaultNetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { return GetNodeGatewayIfAddr(hostSubnet) } @@ -853,6 +867,9 @@ func (nInfo *userDefinedNetInfo) PhysicalNetworkName() string { // Transport returns the transport protocol for east-west traffic func (nInfo *userDefinedNetInfo) Transport() string { + if nInfo.transport == "" { + return types.NetworkTransportGeneve + } return nInfo.transport } @@ -896,6 +913,22 @@ func (nInfo *userDefinedNetInfo) EVPNIPVRFRouteTarget() string { return nInfo.evpn.IPVRF.RouteTarget } +// EVPNMACVRFVID returns the MAC-VRF VID for EVPN +func (nInfo *userDefinedNetInfo) EVPNMACVRFVID() int { + if nInfo.evpn == nil || nInfo.evpn.MACVRF == nil { + return 0 + } + return nInfo.evpn.MACVRF.VID +} + +// EVPNIPVRFVID returns the IP-VRF VID for EVPN +func (nInfo *userDefinedNetInfo) EVPNIPVRFVID() int { + if nInfo.evpn == nil || nInfo.evpn.IPVRF == nil { + return 0 + } + return nInfo.evpn.IPVRF.VID +} + func (nInfo *userDefinedNetInfo) GetNodeGatewayIP(hostSubnet *net.IPNet) *net.IPNet { if IsPreconfiguredUDNAddressesEnabled() && nInfo.TopologyType() == types.Layer2Topology && nInfo.IsPrimaryNetwork() { isIPV6 := knet.IsIPv6CIDR(hostSubnet) @@ -1009,7 +1042,7 @@ func (nInfo *userDefinedNetInfo) canReconcile(other NetInfo) bool { if nInfo.physicalNetworkName != other.PhysicalNetworkName() { return false } - if nInfo.transport != other.Transport() { + if nInfo.Transport() != other.Transport() { return false } if nInfo.EVPNVTEPName() != other.EVPNVTEPName() { @@ -1503,6 +1536,18 @@ func ValidateNetConf(nadName string, netconf *ovncnitypes.NetConf) error { return fmt.Errorf("error parsing Network Attachment Definition %s: %w", nadName, ErrorUnsupportedIPAMKey) } + // Validate transport if specified + if netconf.Transport != "" && + netconf.Transport != types.NetworkTransportGeneve && + netconf.Transport != types.NetworkTransportNoOverlay && + netconf.Transport != types.NetworkTransportEVPN { + return fmt.Errorf("invalid transport %q: must be one of %q", netconf.Transport, []string{ + types.NetworkTransportGeneve, + types.NetworkTransportNoOverlay, + types.NetworkTransportEVPN, + }) + } + if netconf.JoinSubnet != "" && netconf.Topology == types.LocalnetTopology { return fmt.Errorf("localnet topology does not allow specifying join-subnet as services are not supported") } diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 2e8c98b29b..2861650ba8 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -1829,6 +1829,13 @@ func TestAreNetworksCompatible(t *testing.T) { expectedResult: false, expectationDescription: "we should reconcile on physical network name updates", }, + { + desc: "empty transport and geneve config should be compatible", + aNetwork: &userDefinedNetInfo{transport: ""}, + anotherNetwork: &userDefinedNetInfo{transport: "geneve"}, + expectedResult: true, + expectationDescription: "networks with no EVPN config should be compatible", + }, } for _, test := range tests { @@ -2005,8 +2012,10 @@ func TestEVPNConfig(t *testing.T) { expectedVTEPName string expectedMACVRFVNI int32 expectedMACVRFRouteTarget string + expectedMACVRFVID int expectedIPVRFVNI int32 expectedIPVRFRouteTarget string + expectedIPVRFVID int } tests := []testConfig{ @@ -2016,7 +2025,7 @@ func TestEVPNConfig(t *testing.T) { NetConf: cnitypes.NetConf{Name: ovntypes.DefaultNetworkName}, Topology: ovntypes.Layer3Topology, }, - expectedTransport: "", + expectedTransport: "geneve", expectedVTEPName: "", expectedMACVRFVNI: 0, expectedMACVRFRouteTarget: "", @@ -2029,7 +2038,7 @@ func TestEVPNConfig(t *testing.T) { NetConf: cnitypes.NetConf{Name: "l3-network"}, Topology: ovntypes.Layer3Topology, }, - expectedTransport: "", + expectedTransport: "geneve", expectedVTEPName: "", expectedMACVRFVNI: 0, expectedMACVRFRouteTarget: "", @@ -2104,18 +2113,33 @@ func TestEVPNConfig(t *testing.T) { expectedIPVRFRouteTarget: "65000:1000", }, { - desc: "layer2 network with nooverlay transport", + desc: "layer2 network with EVPN transport including VIDs (allocated by controller)", inputNetConf: &ovncnitypes.NetConf{ - NetConf: cnitypes.NetConf{Name: "nooverlay-network"}, + NetConf: cnitypes.NetConf{Name: "evpn-with-vids"}, Topology: ovntypes.Layer2Topology, - Transport: "nooverlay", + Transport: "evpn", + EVPN: &ovncnitypes.EVPNConfig{ + VTEP: "vid-vtep", + MACVRF: &ovncnitypes.VRFConfig{ + VNI: 100, + RouteTarget: "65000:100", + VID: 12, + }, + IPVRF: &ovncnitypes.VRFConfig{ + VNI: 1000, + RouteTarget: "65000:1000", + VID: 13, + }, + }, }, - expectedTransport: "nooverlay", - expectedVTEPName: "", - expectedMACVRFVNI: 0, - expectedMACVRFRouteTarget: "", - expectedIPVRFVNI: 0, - expectedIPVRFRouteTarget: "", + expectedTransport: "evpn", + expectedVTEPName: "vid-vtep", + expectedMACVRFVNI: 100, + expectedMACVRFRouteTarget: "65000:100", + expectedMACVRFVID: 12, + expectedIPVRFVNI: 1000, + expectedIPVRFRouteTarget: "65000:1000", + expectedIPVRFVID: 13, }, { desc: "EVPN config with VNI only (no route target)", @@ -2149,8 +2173,10 @@ func TestEVPNConfig(t *testing.T) { g.Expect(netInfo.EVPNVTEPName()).To(gomega.Equal(test.expectedVTEPName), "VTEP name mismatch") g.Expect(netInfo.EVPNMACVRFVNI()).To(gomega.Equal(test.expectedMACVRFVNI), "MAC-VRF VNI mismatch") g.Expect(netInfo.EVPNMACVRFRouteTarget()).To(gomega.Equal(test.expectedMACVRFRouteTarget), "MAC-VRF RouteTarget mismatch") + g.Expect(netInfo.EVPNMACVRFVID()).To(gomega.Equal(test.expectedMACVRFVID), "MAC-VRF VID mismatch") g.Expect(netInfo.EVPNIPVRFVNI()).To(gomega.Equal(test.expectedIPVRFVNI), "IP-VRF VNI mismatch") g.Expect(netInfo.EVPNIPVRFRouteTarget()).To(gomega.Equal(test.expectedIPVRFRouteTarget), "IP-VRF RouteTarget mismatch") + g.Expect(netInfo.EVPNIPVRFVID()).To(gomega.Equal(test.expectedIPVRFVID), "IP-VRF VID mismatch") }) } } @@ -2173,7 +2199,7 @@ func TestEVPNNetworkCompatibility(t *testing.T) { { desc: "different transport should not be compatible", aNetwork: &userDefinedNetInfo{transport: "evpn"}, - anotherNetwork: &userDefinedNetInfo{transport: "nooverlay"}, + anotherNetwork: &userDefinedNetInfo{transport: "no-overlay"}, expectedResult: false, expectationDescription: "networks with different transport should not be compatible", }, diff --git a/go-controller/pkg/util/node_annotations.go b/go-controller/pkg/util/node_annotations.go index c11bc53e13..7f9af894a1 100644 --- a/go-controller/pkg/util/node_annotations.go +++ b/go-controller/pkg/util/node_annotations.go @@ -117,22 +117,6 @@ const ( // ovnkube-node gets the node's zone from the OVN Southbound database. OvnNodeZoneName = "k8s.ovn.org/zone-name" - /** HACK BEGIN **/ - // TODO(tssurya): Remove this annotation a few months from now (when one or two release jump - // upgrades are done). This has been added only to minimize disruption for upgrades when - // moving to interconnect=true. - // We want the legacy ovnkube-master to wait for remote ovnkube-node to - // signal it using "k8s.ovn.org/remote-zone-migrated" annotation before - // considering a node as remote when we upgrade from "global" (1 zone IC) - // zone to multi-zone. This is so that network disruption for the existing workloads - // is negligible and until the point where ovnkube-node flips the switch to connect - // to the new SBDB, it would continue talking to the legacy RAFT ovnkube-sbdb to ensure - // OVN/OVS flows are intact. - // OvnNodeMigratedZoneName is the zone to which the node belongs to. It is set by ovnkube-node. - // ovnkube-node gets the node's zone from the OVN Southbound database. - OvnNodeMigratedZoneName = "k8s.ovn.org/remote-zone-migrated" - /** HACK END **/ - // OvnTransitSwitchPortAddr is the annotation to store the node Transit switch port ips. // It is set by cluster manager. OvnTransitSwitchPortAddr = "k8s.ovn.org/node-transit-switch-port-ifaddr" @@ -1164,26 +1148,6 @@ func SetNodeZone(nodeAnnotator kube.Annotator, zoneName string) error { return nodeAnnotator.Set(OvnNodeZoneName, zoneName) } -/** HACK BEGIN **/ -// TODO(tssurya): Remove this a few months from now -// SetNodeZoneMigrated sets the node's zone in the 'ovnNodeMigratedZoneName' node annotation. -func SetNodeZoneMigrated(nodeAnnotator kube.Annotator, zoneName string) error { - return nodeAnnotator.Set(OvnNodeMigratedZoneName, zoneName) -} - -// HasNodeMigratedZone returns true if node has its ovnNodeMigratedZoneName set already -func HasNodeMigratedZone(node *corev1.Node) bool { - _, ok := node.Annotations[OvnNodeMigratedZoneName] - return ok -} - -// NodeMigratedZoneAnnotationChanged returns true if the ovnNodeMigratedZoneName annotation changed for the node -func NodeMigratedZoneAnnotationChanged(oldNode, newNode *corev1.Node) bool { - return oldNode.Annotations[OvnNodeMigratedZoneName] != newNode.Annotations[OvnNodeMigratedZoneName] -} - -/** HACK END **/ - // GetNodeZone returns the zone of the node set in the 'ovnNodeZoneName' node annotation. // If the annotation is not set, it returns the 'default' zone name. func GetNodeZone(node *corev1.Node) string { diff --git a/helm/ovn-kubernetes/Chart.yaml b/helm/ovn-kubernetes/Chart.yaml index 4e6ef123ca..9407621aff 100644 --- a/helm/ovn-kubernetes/Chart.yaml +++ b/helm/ovn-kubernetes/Chart.yaml @@ -60,3 +60,7 @@ dependencies: version: 1.2.0 tags: - ovs-node + - name: ovnkube-single-node-zone-dpu + version: 1.2.0 + tags: + - ovnkube-single-node-zone-dpu diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index fd9899c32b..5698797434 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -66,6 +66,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -130,6 +133,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -140,6 +147,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} - name: OVN_DISABLE_SNAT_MULTIPLE_GWS @@ -196,5 +205,8 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml index 4a62d3e661..45c801fa92 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/rbac-ovnkube-cluster-manager.yaml @@ -76,6 +76,8 @@ rules: - networkqoses - userdefinednetworks - clusteruserdefinednetworks + - routeadvertisements + - vteps verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.ovn.org"] resources: @@ -87,6 +89,7 @@ rules: - clusteruserdefinednetworks - clusteruserdefinednetworks/status - clusteruserdefinednetworks/finalizers + - routeadvertisements/status verbs: [ "patch", "update" ] - apiGroups: [""] resources: @@ -127,3 +130,9 @@ rules: - dnsnameresolvers verbs: [ "create", "delete", "list", "patch", "update", "watch" ] {{- end }} + {{- if eq (hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false) true }} + - apiGroups: ["frrk8s.metallb.io"] + resources: + - frrconfigurations + verbs: [ "create", "delete", "get", "list", "patch", "update", "watch" ] + {{- end }} diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index 62849f7af7..df2a7a1d0f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -74,6 +74,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -147,6 +150,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true {{- if eq (hasKey .Values.global "enableCompactMode" | ternary .Values.global.enableCompactMode false) true }} # Common mounts # for the iptables wrapper @@ -240,12 +246,18 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} - name: OVN_DYNAMIC_UDN_ALLOCATION value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_EGRESSSERVICE_ENABLE value: {{ default "" .Values.global.enableEgressService | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR @@ -316,6 +328,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config {{- if eq (hasKey .Values.global "enableCompactMode" | ternary .Values.global.enableCompactMode false) true }} - name: host-slash hostPath: diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml index 7474c69f8f..cb884a9bea 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/rbac-ovnkube-master.yaml @@ -87,6 +87,7 @@ rules: - userdefinednetworks - clusteruserdefinednetworks - networkqoses + - vteps verbs: [ "get", "list", "watch" ] - apiGroups: ["k8s.cni.cncf.io"] resources: diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml index a7a3ebc4c9..f009f7036f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/templates/ovnkube-node-dpu-host.yaml @@ -97,10 +97,25 @@ spec: # ovnkube-node dpu-host mounts - mountPath: /var/run/ovn name: var-run-ovn + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + {{- if .Values.global.enableNetworkSegmentation }} + - mountPath: /var/run/k8s.cni.cncf.io/devinfo/dp + name: host-devinfo-dp + readOnly: true + {{- end }} resources: requests: cpu: 100m memory: 300Mi + {{- if and (.Values.global.enableNetworkSegmentation) (.Values.mgmtPortVFResourceName) (.Values.mgmtPortVFsCount) }} + {{ .Values.mgmtPortVFResourceName }}: {{ .Values.mgmtPortVFsCount }} + {{- end }} + limits: + {{- if and (.Values.global.enableNetworkSegmentation) (.Values.mgmtPortVFResourceName) (.Values.mgmtPortVFsCount) }} + {{ .Values.mgmtPortVFResourceName }}: {{ .Values.mgmtPortVFsCount }} + {{- end }} env: {{ if .Values.global.enableCoredumps -}} - name: GOTRACEBACK @@ -210,8 +225,16 @@ spec: value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVNKUBE_NODE_MGMT_PORT_NETDEV - value: {{ default "" .Values.global.nodeMgmtPortNetdev | quote }} + value: {{ default "" .Values.nodeMgmtPortNetdev | quote }} + - name: OVNKUBE_NODE_MGMT_PORT_DP_RESOURCE_NAME + value: {{ default "" .Values.mgmtPortVFResourceName | quote }} + - name: OVN_ENABLE_INTERCONNECT + value: {{ default "false" .Values.global.enableInterconnect | quote }} + - name: OVN_NETWORK_SEGMENTATION_ENABLE + value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: @@ -263,5 +286,13 @@ spec: path: /run/systemd - name: var-run-ovn emptyDir: {} + - name: ovnkube-config + configMap: + name: ovnkube-config + {{- if .Values.global.enableNetworkSegmentation }} + - name: host-devinfo-dp + hostPath: + path: /var/run/k8s.cni.cncf.io/devinfo/dp + {{- end }} tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml index 584127efca..914b06593e 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu-host/values.yaml @@ -2,4 +2,18 @@ logLevel: 4 logFileMaxSize: 100 logFileMaxBackups: 5 logFileMaxAge: 5 -ovnControllerLogLevel: 4 \ No newline at end of file +ovnControllerLogLevel: 4 + +# The netdevice or deviceplugin resourcename that specifies pool of devices +# that can be used for management ports has to be specified. +# mgmtPortVFResourceName will override nodeMgmtPortNetdev if both are specified + +# The net device to be used for management port +nodeMgmtPortNetdev: "" + +# The device plugin resource name that has allocated interfaces to be used for management ports +mgmtPortVFResourceName: "" + +# If using UDNs, the number of VFs required to handle management ports, which depends on +# the number of primary UDNs required should be specified. +mgmtPortVFsCount: 1 diff --git a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml index dd961de8e7..b1039f6d8f 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node-dpu/templates/ovnkube-node-dpu.yaml @@ -108,6 +108,9 @@ spec: name: run-systemd subPath: private readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -241,6 +244,8 @@ spec: value: {{ hasKey .Values.global "enableDynamicUDNAllocation" | ternary .Values.global.enableDynamicUDNAllocation false | quote }} - name: OVN_DYNAMIC_UDN_GRACE_PERIOD value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: @@ -397,5 +402,8 @@ spec: - name: run-systemd hostPath: path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml index 659bfa2bab..3a0b5162e4 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-node/templates/ovnkube-node.yaml @@ -98,6 +98,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /etc/openvswitch/ name: host-etc-ovs readOnly: true @@ -231,6 +234,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -241,6 +248,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_ENABLE_INTERCONNECT value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY @@ -411,5 +420,8 @@ spec: - name: run-systemd hostPath: path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config tolerations: - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml new file mode 100644 index 0000000000..a956168562 --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: ovnkube-single-node-zone-dpu +description: Helm chart to deploy single node zone stack on DPUs +type: application +version: 1.2.0 +appVersion: "1.2.0" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml new file mode 100644 index 0000000000..ac78168e4f --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/templates/ovnkube-single-node-zone-dpu.yaml @@ -0,0 +1,575 @@ +# ovnkube-node-dpu +# daemonset version 3 +# starts node daemons for single node zone ovn stack, each in a separate container on DPU +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovnkube-node-dpu + # namespace set up by install + namespace: ovn-kubernetes + annotations: + kubernetes.io/description: | + This DaemonSet launches the ovn-kubernetes networking components on dpus in IC mode. +spec: + selector: + matchLabels: + app: ovnkube-node-dpu + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: ovnkube-node-dpu + name: ovnkube-node-dpu + component: network + type: infra + kubernetes.io/os: "linux" + ovn-db-pod: "true" + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + serviceAccountName: ovnkube-node + hostNetwork: true + dnsPolicy: Default + {{- if eq (hasKey .Values.global "unprivilegedMode" | ternary .Values.global.unprivilegedMode false) false }} + hostPID: true + {{- end }} + containers: + # nb-ovsdb - v3 + - name: nb-ovsdb + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "local-nb-ovsdb"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # ovn db is stored in the pod in /etc/openvswitch + # (or in /etc/ovn if OVN from new repository is used) + # and on the host in /var/lib/openvswitch/ + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_NB + value: {{ default "-vconsole:info -vfile:info" .Values.nbLogLevel | quote }} + - name: OVN_NORTHD_BACKOFF_INTERVAL + value: {{ default "0" .Values.northdBackoffInterval | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnnb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of nb-ovsdb container + # sb-ovsdb - v3 + - name: sb-ovsdb + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "local-sb-ovsdb"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # ovn db is stored in the pod in /etc/openvswitch + # (or in /etc/ovn if OVN from new repository is used) + # and on the host in /var/lib/openvswitch/ + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_SB + value: {{ default "-vconsole:info -vfile:info" .Values.sbLogLevel | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnsb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of sb-ovsdb container + # ovn-northd - v3 + - name: ovn-northd + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "run-ovn-northd"] + securityContext: + runAsUser: 0 + capabilities: + add: ["SYS_NICE"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # Run directories where we need to be able to access sockets + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_NORTHD + value: {{ default "-vconsole:info -vfile:info" .Values.northdLogLevel | quote }} + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-northd"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of ovn-northd container + # ovnkube-controller + - name: ovnkube-controller + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovnkube-controller-with-node"] + securityContext: + runAsUser: 0 + {{- if eq (hasKey .Values.global "unprivilegedMode" | ternary .Values.global.unprivilegedMode false) false }} + privileged: true + {{- else }} + capabilities: + add: + - NET_ADMIN + {{- end }} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # Common mounts + # for the iptables wrapper + - mountPath: /host + name: host-slash + readOnly: true + - mountPath: /var/lib/kubelet + name: host-kubelet + readOnly: true + - mountPath: /host-kubernetes + name: host-kubeconfig + readOnly: true + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/ovn-kubernetes/ + name: host-var-log-ovnkube + # We mount our socket here + - mountPath: /var/run/ovn-kubernetes + name: host-var-run-ovn-kubernetes + # CNI related mounts which we take over + - mountPath: /opt/cni/bin + name: host-opt-cni-bin + - mountPath: /etc/cni/net.d + name: host-etc-cni-netd + - mountPath: /var/run/netns + name: host-netns + mountPropagation: Bidirectional + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + - mountPath: /etc/openvswitch/ + name: host-etc-ovs + readOnly: true + - mountPath: /etc/ovn/ + name: host-var-lib-ovs + readOnly: true + - mountPath: /run/systemd/private + name: run-systemd + subPath: private + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_EGRESSSERVICE_ENABLE + value: {{ default "" .Values.global.enableEgressService | quote }} + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_LOGLEVEL + value: {{ default 4 .Values.ovnkubeNodeLogLevel | quote }} + - name: OVNKUBE_LOGFILE_MAXSIZE + value: {{ default 100 .Values.logFileMaxSize | quote }} + - name: OVNKUBE_LOGFILE_MAXBACKUPS + value: {{ default 5 .Values.logFileMaxBackups | quote }} + - name: OVNKUBE_LOGFILE_MAXAGE + value: {{ default 5 .Values.logFileMaxAge | quote }} + - name: OVNKUBE_LIBOVSDB_CLIENT_LOGFILE + value: {{ default "" .Values.libovsdbClientLogFile | quote }} + - name: OVNKUBE_CONFIG_DURATION_ENABLE + value: {{ default "" .Values.global.enableConfigDuration | quote }} + - name: OVNKUBE_METRICS_SCALE_ENABLE + value: {{ default "" .Values.global.enableMetricsScale | quote }} + - name: OVN_NET_CIDR + value: {{ default "" .Values.global.dpuHostClusterNetworkCIDR | quote }} + - name: OVN_SVC_CIDR + value: {{ default "" .Values.global.dpuHostClusterServiceCIDR | quote }} + - name: K8S_APISERVER + value: {{ default "" .Values.global.dpuHostClusterK8sAPIServer | quote }} + - name: K8S_TOKEN + value: {{ default "" .Values.global.dpuHostClusterK8sToken | quote }} + - name: K8S_CACERT_DATA + value: {{ default "" .Values.global.dpuHostClusterK8sCACertData | quote }} + - name: K8S_TOKEN_FILE + value: {{ default "" .Values.global.dpuHostClusterK8sTokenFile | quote }} + - name: K8S_CACERT + value: {{ default "" .Values.global.dpuHostClusterK8sCACert | quote }} + - name: OVN_MTU + value: {{ default "" .Values.global.mtu | quote }} + - name: OVN_GATEWAY_MODE + value: {{ default "shared" .Values.global.gatewayMode }} + - name: OVN_GATEWAY_OPTS + value: {{ default "" .Values.global.gatewayOpts | quote }} + - name: OVN_HYBRID_OVERLAY_ENABLE + value: {{ default "" .Values.global.enableHybridOverlay | quote }} + - name: OVN_ADMIN_NETWORK_POLICY_ENABLE + value: {{ default "" .Values.global.enableAdminNetworkPolicy | quote }} + - name: OVN_EGRESSIP_ENABLE + value: {{ default "" .Values.global.enableEgressIp | quote }} + - name: OVN_EGRESSIP_HEALTHCHECK_PORT + value: {{ default "" .Values.global.egressIpHealthCheckPort | quote }} + - name: OVN_EGRESSFIREWALL_ENABLE + value: {{ default "" .Values.global.enableEgressFirewall | quote }} + - name: OVN_EGRESSQOS_ENABLE + value: {{ default "" .Values.global.enableEgressQos | quote }} + - name: OVN_HYBRID_OVERLAY_NET_CIDR + value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} + - name: OVN_DISABLE_SNAT_MULTIPLE_GWS + value: {{ default "" .Values.global.disableSnatMultipleGws | quote }} + - name: OVN_DISABLE_FORWARDING + value: {{ default "" .Values.global.disableForwarding | quote }} + - name: OVN_ENCAP_PORT + value: {{ default 6081 .Values.global.encapPort | quote }} + - name: OVN_DISABLE_PKT_MTU_CHECK + value: {{ default "" .Values.global.disablePacketMtuCheck | quote }} + - name: OVN_NETFLOW_TARGETS + value: {{ default "" .Values.global.netFlowTargets | quote }} + - name: OVN_SFLOW_TARGETS + value: {{ default "" .Values.global.sflowTargets | quote }} + - name: OVN_IPFIX_TARGETS + value: {{ default "" .Values.global.ipfixTargets | quote }} + - name: OVN_IPFIX_SAMPLING + value: {{ default "" .Values.global.ipfixSampling | quote }} + - name: OVN_IPFIX_CACHE_MAX_FLOWS + value: {{ default "" .Values.global.ipfixCacheMaxFlows | quote }} + - name: OVN_IPFIX_CACHE_ACTIVE_TIMEOUT + value: {{ default "" .Values.global.ipfixCacheActiveTimeout | quote }} + - name: OVN_V4_JOIN_SUBNET + value: {{ default "" .Values.global.v4JoinSubnet | quote }} + - name: OVN_V6_JOIN_SUBNET + value: {{ default "" .Values.global.v6JoinSubnet | quote }} + - name: OVN_V4_MASQUERADE_SUBNET + value: {{ default "" .Values.global.v4MasqueradeSubnet | quote }} + - name: OVN_V6_MASQUERADE_SUBNET + value: {{ default "" .Values.global.v6MasqueradeSubnet | quote }} + - name: OVN_MULTICAST_ENABLE + value: {{ default "" .Values.global.enableMulticast | quote }} + - name: OVN_UNPRIVILEGED_MODE + value: {{ include "isUnprivilegedMode" . | quote }} + - name: OVN_EX_GW_NETWORK_INTERFACE + value: {{ default "" .Values.global.extGatewayNetworkInterface | quote }} + - name: OVN_SSL_ENABLE + value: {{ include "isSslEnabled" . | quote }} + - name: OVN_REMOTE_PROBE_INTERVAL + value: {{ default 100000 .Values.global.remoteProbeInterval | quote }} + - name: OVN_MONITOR_ALL + value: {{ hasKey .Values.global "monitorAll" | ternary .Values.global.monitorAll true | quote }} + - name: OVN_OFCTRL_WAIT_BEFORE_CLEAR + value: {{ default "" .Values.global.ofctrlWaitBeforeClear | quote }} + - name: OVN_ENABLE_LFLOW_CACHE + value: {{ hasKey .Values.global "enableLFlowCache" | ternary .Values.global.enableLFlowCache true | quote }} + - name: OVN_LFLOW_CACHE_LIMIT + value: {{ default "" .Values.global.lFlowCacheLimit | quote }} + - name: OVN_LFLOW_CACHE_LIMIT_KB + value: {{ default "" .Values.global.lFlowCacheLimitKb | quote }} + - name: OVN_MULTI_NETWORK_ENABLE + value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} + - name: OVN_NETWORK_SEGMENTATION_ENABLE + value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_NETWORK_CONNECT_ENABLE + value: {{ default "" .Values.global.enableNetworkConnect | quote }} + - name: OVN_PRE_CONF_UDN_ADDR_ENABLE + value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} + - name: OVN_ADVERTISED_UDN_ISOLATION_MODE + value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_EMPTY_LB_EVENTS + value: {{ default "" .Values.global.emptyLbEvents | quote }} + - name: OVN_ACL_LOGGING_RATE_LIMIT + value: {{ default 20 .Values.global.aclLoggingRateLimit | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + - name: OVN_ENABLE_INTERCONNECT + value: {{ hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false | quote }} + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: {{ hasKey .Values.global "enableMultiExternalGateway" | ternary .Values.global.enableMultiExternalGateway false | quote }} + - name: OVN_ENABLE_OVNKUBE_IDENTITY + value: {{ hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity false | quote }} + - name: OVN_ENABLE_SVC_TEMPLATE_SUPPORT + value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate false | quote }} + - name: OVN_ENABLE_DNSNAMERESOLVER + value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_OBSERV_ENABLE + value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} + - name: OVN_NETWORK_QOS_ENABLE + value: {{ hasKey .Values.global "enableNetworkQos" | ternary .Values.global.enableNetworkQos false | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} + - name: OVN_KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + readinessProbe: + httpGet: + path: /metrics + port: {{ .Values.metricsPort }} + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + periodSeconds: 30 + # end of ovnkube-controller container + # ovn-controller + - name: ovn-controller + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovn-controller"] + securityContext: + runAsUser: 0 + capabilities: + add: ["SYS_NICE"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/log/ovn/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + - mountPath: /var/run/ovn/ + name: host-var-run-ovs + - mountPath: /ovn-cert + name: host-ovn-cert + readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_LOGLEVEL_CONTROLLER + value: {{ default "-vconsole:info" .Values.ovnControllerLogLevel | quote }} + - name: OVN_SSL_ENABLE + value: {{ default "" .Values.global.enableSsl | quote }} + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # ovs-metrics-exporter - v3 + - name: ovs-metrics-exporter + image: {{ include "getDPUImage" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.dpuImage.pullPolicy }} + command: ["/root/ovnkube.sh", "ovs-metrics"] + securityContext: + runAsUser: 0 + capabilities: + add: ["NET_ADMIN"] + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/run/dbus/ + name: host-var-run-dbus + readOnly: true + - mountPath: /var/log/openvswitch/ + name: host-var-log-ovs + - mountPath: /var/run/openvswitch/ + name: host-var-run-ovs + readOnly: true + resources: + requests: + cpu: 100m + memory: 300Mi + env: + - name: OVN_DAEMONSET_VERSION + value: "1.2.0" + - name: OVNKUBE_NODE_MODE + value: "dpu" + - name: OVN_NORTH + value: "local" + - name: OVN_SOUTH + value: "local" + # end of container + nodeSelector: + kubernetes.io/os: "linux" + k8s.ovn.org/dpu: "" + volumes: + # Common volumes + - name: host-var-run-dbus + hostPath: + path: /var/run/dbus + - name: host-kubelet + hostPath: + path: /var/lib/kubelet + - name: host-kubeconfig + hostPath: + path: /etc/kubernetes/ + - name: host-var-log-ovnkube + hostPath: + path: /var/log/ovn-kubernetes + - name: host-var-run-ovn-kubernetes + hostPath: + path: /var/run/ovn-kubernetes + - name: host-opt-cni-bin + hostPath: + path: /opt/cni/bin + - name: host-etc-cni-netd + hostPath: + path: /etc/cni/net.d + - name: host-slash + hostPath: + path: / + - name: host-netns + hostPath: + path: /var/run/netns + - name: host-var-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-var-run-ovs + hostPath: + path: /var/run/openvswitch + - name: host-ovn-cert + hostPath: + path: /etc/ovn + type: DirectoryOrCreate + - name: host-etc-ovs + hostPath: + path: /etc/openvswitch + - name: host-var-lib-ovs + hostPath: + path: /var/lib/openvswitch + - name: run-systemd + hostPath: + path: /run/systemd + - name: ovnkube-config + configMap: + name: ovnkube-config + tolerations: + - operator: "Exists" diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml new file mode 100644 index 0000000000..b7d2004c60 --- /dev/null +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone-dpu/values.yaml @@ -0,0 +1,12 @@ +nbLogLevel: "-vconsole:info -vfile:info" +sbLogLevel: "-vconsole:info -vfile:info" +northdLogLevel: "-vconsole:info -vfile:info" +northdBackoffInterval: "0" +ovnkubeNodeLogLevel: 4 +ovnControllerLogLevel: "-vconsole:info" +logFileMaxSize: 100 +logFileMaxBackups: 5 +logFileMaxAge: 5 +libovsdbClientLogFile: "" +# -- TCP port serving metrics +metricsPort: 9476 diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index 001213d48a..c2503c0d1d 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -64,6 +64,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -126,6 +129,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -191,6 +197,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -271,6 +280,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /etc/openvswitch/ name: host-etc-ovs readOnly: true @@ -419,6 +431,10 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE @@ -429,6 +445,8 @@ spec: value: {{ default "" .Values.global.dynamicUDNGracePeriod | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVNKUBE_NODE_MGMT_PORT_NETDEV value: {{ default "" .Values.global.nodeMgmtPortNetdev | quote }} - name: OVN_EMPTY_LB_EVENTS @@ -488,6 +506,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -562,6 +583,8 @@ spec: - matchExpressions: - key: k8s.ovn.org/dpu-host operator: DoesNotExist + - key: k8s.ovn.org/dpu + operator: DoesNotExist volumes: # Common volumes - name: host-var-run-dbus @@ -604,6 +627,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config - name: host-etc-ovs hostPath: path: /etc/openvswitch diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index 5dd4b4c231..bd03a2518c 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -69,6 +69,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -131,6 +134,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true - mountPath: /var/run/ovn/ name: host-var-run-ovs - mountPath: /var/run/openvswitch/ @@ -196,6 +202,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -249,6 +258,9 @@ spec: - mountPath: /ovn-cert name: host-ovn-cert readOnly: true + - mountPath: /run/ovnkube-config + name: ovnkube-config + readOnly: true resources: requests: cpu: 100m @@ -317,12 +329,18 @@ spec: value: {{ hasKey .Values.global "enableMultiNetwork" | ternary .Values.global.enableMultiNetwork false | quote }} - name: OVN_NETWORK_SEGMENTATION_ENABLE value: {{ default "" .Values.global.enableNetworkSegmentation | quote }} + - name: OVN_ROUTE_ADVERTISEMENTS_ENABLE + value: {{ hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false | quote }} + - name: OVN_EVPN_ENABLE + value: {{ hasKey .Values.global "enableEVPN" | ternary .Values.global.enableEVPN false | quote }} - name: OVN_NETWORK_CONNECT_ENABLE value: {{ default "" .Values.global.enableNetworkConnect | quote }} - name: OVN_PRE_CONF_UDN_ADDR_ENABLE value: {{ default "" .Values.global.enablePreconfiguredUDNAddresses | quote }} - name: OVN_ADVERTISED_UDN_ISOLATION_MODE value: {{ default "strict" .Values.global.advertisedUDNIsolationMode | quote }} + - name: OVN_NO_OVERLAY_ENABLE + value: {{ default "false" .Values.global.enableNoOverlay | quote }} - name: OVN_HYBRID_OVERLAY_NET_CIDR value: {{ default "" .Values.global.hybridOverlayNetCidr | quote }} - name: OVN_DISABLE_SNAT_MULTIPLE_GWS @@ -409,6 +427,9 @@ spec: hostPath: path: /etc/ovn type: DirectoryOrCreate + - name: ovnkube-config + configMap: + name: ovnkube-config - name: host-var-lib-ovs hostPath: path: /var/lib/openvswitch diff --git a/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml b/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml index 1f33d980d7..18855d51e6 100644 --- a/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml +++ b/helm/ovn-kubernetes/charts/ovs-node/templates/ovs-node.yaml @@ -84,9 +84,6 @@ spec: requests: cpu: 100m memory: 300Mi - limits: - cpu: 500m - memory: 500Mi env: - name: OVN_DAEMONSET_VERSION value: "1.2.0" diff --git a/helm/ovn-kubernetes/templates/ovn-setup.yaml b/helm/ovn-kubernetes/templates/ovn-setup.yaml index e9a5ef8981..b76ebf0929 100644 --- a/helm/ovn-kubernetes/templates/ovn-setup.yaml +++ b/helm/ovn-kubernetes/templates/ovn-setup.yaml @@ -50,6 +50,26 @@ data: mtu: {{ .Values.mtu | default 1500 | quote }} host_network_namespace: {{ $hostNetworkNamespace }} +--- +# ovnkube-config ConfigMap +# +# Configuration for ovnkube binaries +kind: ConfigMap +apiVersion: v1 +metadata: + name: ovnkube-config + namespace: ovn-kubernetes +data: + ovnkube.conf: | +{{- if .Values.global.enableNoOverlay }} + [default] + transport = no-overlay + + [no-overlay] + outbound-snat = disabled + routing = unmanaged +{{- end }} + {{- if or .Values.global.skipCallToK8s (eq (include "needNamespace" $hostNetworkNamespace) "true") }} --- # ovn-host-network-namespace.yaml @@ -64,6 +84,23 @@ metadata: name: {{ $hostNetworkNamespace }} {{- end }} +{{- if and (eq (hasKey .Values.global "enableRouteAdvertisements" | ternary .Values.global.enableRouteAdvertisements false) true) (eq (hasKey .Values.global "advertiseDefaultNetwork" | ternary .Values.global.advertiseDefaultNetwork false) true) }} +--- +apiVersion: k8s.ovn.org/v1 +kind: RouteAdvertisements +metadata: + name: default +spec: + networkSelectors: + - networkSelectionType: DefaultNetwork + nodeSelector: {} + frrConfigurationSelector: + matchLabels: + name: receive-all + advertisements: + - "PodNetwork" +{{- end }} + {{- if (and .Values.global.dockerConfigSecret .Values.global.dockerConfigSecret.create) }} --- apiVersion: v1 diff --git a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml index cf111e6482..a2bec63d7e 100644 --- a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml +++ b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml @@ -162,6 +162,7 @@ rules: - egressfirewalls/status - adminpolicybasedexternalroutes/status - egressqoses/status + - routeadvertisements/status - networkqoses/status verbs: [ "patch", "update" ] - apiGroups: ["policy.networking.k8s.io"] @@ -184,6 +185,7 @@ rules: - adminpolicybasedexternalroutes - userdefinednetworks - clusteruserdefinednetworks + - routeadvertisements - networkqoses - clusternetworkconnects verbs: [ "get", "list", "watch" ] diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index 77f1369af8..d3b1c16755 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -76,6 +76,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes enablePreconfiguredUDNAddresses: false # -- Configure to enable IPsec diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index 34bd024435..1ed4e30a2d 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -8,6 +8,7 @@ tags: ovnkube-node-dpu: false ovnkube-node-dpu-host: false ovnkube-single-node-zone: false + ovnkube-single-node-zone-dpu: false ovnkube-zone-controller: false # -- Endpoint of Kubernetes api server @@ -70,6 +71,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable IPsec enableIpsec: false # -- Use SSL transport to NB/SB db and northd @@ -183,4 +194,3 @@ monitoring: enableServiceMonitor: false # -- deploy PrometheusRules for specific metric collection using the Prometheus Operator enablePrometheusRule: false - diff --git a/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml b/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml new file mode 100644 index 0000000000..2105d1302b --- /dev/null +++ b/helm/ovn-kubernetes/values-single-node-zone-dpu.yaml @@ -0,0 +1,176 @@ +# Values for ovn-kubernetes with single-node zone interconnect for DPU cluster +# Requires: ovnkube-single-node-zone-dpu only + +# -- The following subcharts should be disabled +tags: + ovs-node: false + ovn-ipsec: false + ovnkube-db: false + ovnkube-db-raft: false + ovnkube-master: false + ovnkube-node: false + ovnkube-control-plane: false + ovnkube-node-dpu-host: false + ovnkube-node-dpu: false + ovnkube-single-node-zone: false + ovnkube-zone-controller: false + +# -- Whether or not call `lookup` Helm function, set it to `true` if you want to run `helm dry-run/template/lint` +skipCallToK8s: false + +global: + # -- The interface on nodes that will be used for external gateway network traffic + extGatewayNetworkInterface: "" + # -- GENEVE UDP port (default 6081) + encapPort: 6081 + # -- The gateway mode (shared or local), if not given, gateway functionality is disabled + gatewayMode: shared + # -- Optional extra gateway options + gatewayOpts: "" + # -- This allows ovnkube-node to run without SYS_ADMIN capability, by performing interface setup in the CNI plugin + unprivilegedMode: false + # -- The v4 join subnet used for assigning join switch IPv4 addresses + v4JoinSubnet: "100.64.0.0/16" + # -- The v4 masquerade subnet used for assigning masquerade IPv4 addresses + v4MasqueradeSubnet: "169.254.0.0/17" + # -- The v4 subnet for transit switches and routers + v4TransitSubnet: "100.88.0.0/16" + # -- The v6 join subnet used for assigning join switch IPv6 addresses + v6JoinSubnet: "fd98::/64" + # -- The v6 masquerade subnet used for assigning masquerade IPv6 addresses + v6MasqueradeSubnet: "fd69::/112" + # -- The v6 subnet for transit switches and routers + v6TransitSubnet: "fd97::/64" + # -- Whether or not enable ovnkube identity webhook + enableOvnKubeIdentity: false + # -- Whether or not to enable hybrid overlay functionality + enableHybridOverlay: "" + # -- A comma separated set of IP subnets and the associated hostsubnetlengths (eg, \"10.128.0.0/14/23,10.0.0.0/14/23\") to use with the extended hybrid network + hybridOverlayNetCidr: "" + # -- Whether or not to use Admin Network Policy CRD feature with ovn-kubernetes + enableAdminNetworkPolicy: false + # -- Configure to use EgressIP CRD feature with ovn-kubernetes + enableEgressIp: false + # -- Configure EgressIP node reachability using gRPC on this TCP port + egressIpHealthCheckPort: 9107 + # -- Configure to use EgressService CRD feature with ovn-kubernetes + enableEgressService: false + # -- Configure to use EgressFirewall CRD feature with ovn-kubernetes + enableEgressFirewall: false + # -- Configure to use EgressQoS CRD feature with ovn-kubernetes + enableEgressQos: false + # -- Enables network QoS support from/to pods + enableNetworkQos: false + # -- Enables multicast support between the pods within the same namespace + enableMulticast: "" + # -- Configure to use multiple NetworkAttachmentDefinition CRD feature with ovn-kubernetes + enableMultiNetwork: false + # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes + enableNetworkSegmentation: false + # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes + enablePreconfiguredUDNAddresses: false + # -- Configure to enable IPsec + enableIpsec: false + # -- Use SSL transport to NB/SB db and northd + enableSsl: false + # -- Configure to enable interconnecting multiple zones + # @default -- true + enableInterconnect: true + # -- Configure to use AdminPolicyBasedExternalRoute CRD feature with ovn-kubernetes + enableMultiExternalGateway: false + # -- Configure to use stateless network policy feature with ovn-kubernetes + enableStatelessNetworkPolicy: false + # -- Configure to use service template feature with ovn-kubernetes + enableSvcTemplate: false + # -- Enables metrics related to scaling + enableMetricsScale: "" + # -- Enables monitoring OVN-Kubernetes master and OVN configuration duration + enableConfigDuration: "" + # -- Indicates if ovn-controller should enable/disable the logical flow in-memory cache when processing Southbound database logical flow changes + # @default -- true + enableLFlowCache: true + # -- Maximum number of logical flow cache entries ovn-controller may create when the logical flow cache is enabled + # @default -- unlimited + lFlowCacheLimit: "" + # -- Maximum size of the logical flow cache (in KB) ovn-controller may create when the logical flow cache is enabled + lFlowCacheLimitKb: "" + # -- Configure to use the IPAMClaims CRD feature with ovn-kubernetes, thus granting persistent IPs across restarts / migration for KubeVirt VMs + enablePersistentIPs: false + # -- Configure to use DNSNameResolver feature with ovn-kubernetes + enableDNSNameResolver: false + # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws + disableSnatMultipleGws: "" + # -- Controls if forwarding is allowed on OVNK controlled interfaces + # @default -- false + disableForwarding: "" + # -- Disables adding openflow flows to check packets too large to be delivered to OVN due to pod MTU being lower than NIC MTU + disablePacketMtuCheck: "" + # -- The largest number of messages per second that gets logged before drop + # @default 20 + aclLoggingRateLimit: 20 + # -- If set, then load balancers do not get deleted when all backends are removed + emptyLbEvents: "" + # -- Port of north bound ovsdb + nbPort: 6641 + # -- Port of south bound ovsdb + sbPort: 6642 + # -- A comma separated set of NetFlow collectors to export flow data + netFlowTargets: "" + # -- A comma separated set of SFlow collectors to export flow data + sflowTargets: "" + # -- A comma separated set of IPFIX collectors to export flow data + ipfixTargets: "" + # -- Rate at which packets should be sampled and sent to each target collector + # @default 400 + ipfixSampling: "" + # -- Maximum number of IPFIX flow records that can be cached at a time + # @default 0, meaning disabled + ipfixCacheMaxFlows: "" + # -- Maximum period in seconds for which an IPFIX flow record is cached and aggregated before being sent + # @default 60 + ipfixCacheActiveTimeout: "" + # -- OVN remote probe interval in ms + # @default 100000 + remoteProbeInterval: 100000 + # -- Enable monitoring all data from SB DB instead of conditionally monitoring the data relevant to this node only + # @default true + monitorAll: true + # -- ovn-controller wait time in ms before clearing OpenFlow rules during start up + # @default 0 + ofctrlWaitBeforeClear: "0" + # -- Container images + # -- Image for DPUs + dpuImage: + # -- Image repository for ovn-kubernetes components + repository: ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-ubuntu + # -- Specify image tag to run + tag: master + # -- Image pull policy + pullPolicy: IfNotPresent + # -- The name of secret used for pulling image. Use only if needed + imagePullSecretName: "" + # -- Endpoint of DPU Host cluster's Kubernetes api server + dpuHostClusterK8sAPIServer: https://172.25.0.2:6443 + # -- DPU Host cluster's Kubernetes Access Token + dpuHostClusterK8sToken: "" + # -- DPU Host cluster's Kubernetes Access Certs Data + dpuHostClusterK8sCACertData: "" + # -- DPU Host cluster's Kubernetes Access Token File + dpuHostClusterK8sTokenFile: "" + # -- DPU Host cluster's Kubernetes Access Certs File + dpuHostClusterK8sCACert: "" + # -- DPU Host cluster's Network CIDR + dpuHostClusterNetworkCIDR: 10.244.0.0/16/24 + # -- DPU Host cluster's Service CIDR + dpuHostClusterServiceCIDR: 10.96.0.0/16 + # -- MTU of network interface in a Kubernetes pod + mtu: 1400 + +# -- prometheus monitoring related fields +monitoring: + # -- specify the labels for serviceMonitors to be selected for target discovery. + # Prometheus operator defines what namespaces and what servicemonitors within these + # namespaces must be selected for target discovery. The fields defined below helps + # in defining that. + commonServiceMonitorSelectorLabels: + release: kube-prometheus-stack diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index 6356006c8c..b9f4f2caf2 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -9,6 +9,7 @@ tags: ovnkube-master: false ovnkube-node: false ovnkube-node-dpu: false + ovnkube-single-node-zone-dpu: false ovnkube-node-dpu-host: false ovnkube-zone-controller: false @@ -76,6 +77,16 @@ global: enableMultiNetwork: false # -- Configure to use user defined networks (UDN) feature with ovn-kubernetes enableNetworkSegmentation: false + # -- Configure to use route advertisements feature with ovn-kubernetes + enableRouteAdvertisements: false + # -- Configure to use EVPN feature with ovn-kubernetes + enableEVPN: false + # -- Advertise default network on all nodes with a default RouteAdvertisements configuration + advertiseDefaultNetwork: false + # -- Pod network isolation between advertised UDN networks. (strict or loose) + advertisedUDNIsolationMode: "strict" + # -- Configure to enable no-overlay mode for the default network + enableNoOverlay: false # -- Configure to enable workloads with preconfigured network connect to user defined networks (UDN) with ovn-kubernetes enablePreconfiguredUDNAddresses: false # -- Configure to enable IPsec @@ -169,4 +180,3 @@ monitoring: enableServiceMonitor: false # -- deploy PrometheusRules for specific metric collection using the Prometheus Operator enablePrometheusRule: false - diff --git a/openshift/cmd/ovn-kubernetes-tests-ext/main.go b/openshift/cmd/ovn-kubernetes-tests-ext/main.go index 56258e65ab..62e1024c60 100644 --- a/openshift/cmd/ovn-kubernetes-tests-ext/main.go +++ b/openshift/cmd/ovn-kubernetes-tests-ext/main.go @@ -4,6 +4,7 @@ import ( "os" "strings" + "github.com/ovn-org/ovn-kubernetes/openshift/test" "github.com/ovn-org/ovn-kubernetes/openshift/test/generated" // import ovn-kubernetes tests _ "github.com/ovn-org/ovn-kubernetes/test/e2e" @@ -26,6 +27,14 @@ import ( _ "k8s.io/component-base/logs/testinit" ) +func loadBlockingTests() map[string]bool { + blockingTests := make(map[string]bool) + for _, testName := range test.BlockingTests { + blockingTests[testName] = true + } + return blockingTests +} + func main() { // Create our registry of openshift-tests extensions extensionRegistry := extension.NewRegistry() @@ -61,6 +70,8 @@ func main() { } }) + blockingTests := loadBlockingTests() + specs.Walk(func(spec *extensiontests.ExtensionTestSpec) { for _, label := range getTestExtensionLabels() { spec.Labels.Insert(label) @@ -76,6 +87,10 @@ func main() { spec.Name += " " + annotations } spec.Name = generatePrependedLabelsStr(spec.Labels) + " " + spec.Name // prepend ginkgo labels to test name + + if !blockingTests[spec.Name] { + spec.Lifecycle = extensiontests.LifecycleInforming + } }) specs = specs.Select(func(spec *extensiontests.ExtensionTestSpec) bool { diff --git a/openshift/test/blocking_tests.go b/openshift/test/blocking_tests.go new file mode 100644 index 0000000000..1e45e1164a --- /dev/null +++ b/openshift/test/blocking_tests.go @@ -0,0 +1,63 @@ +package test + +// BlockingTests lists tests that are considered stable and should block CI jobs if they fail. +// Tests NOT in this list or explicitly "Disabled" in annotations will be marked as "informing" +// - they run but failures don't fail the job. +// +// To graduate a test from informing to blocking: +// 1. Add the full test name to this slice (with proper quotes and comma) +// 2. Rebuild: ./hack/build-tests-ext.sh +// 3. Verify: ./bin/ovn-kubernetes-tests-ext list tests | jq -r '.[] | select(.name == "test name here") | .lifecycle' +// +// Used by: openshift/cmd/ovn-kubernetes-tests-ext/main.go +var BlockingTests = []string{ + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork creates a networkStatus Annotation with UDN interface L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork creates a networkStatus Annotation with UDN interface L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using ClusterUserDefinedNetwork creates a networkStatus Annotation with UDN interface L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions can perform east/west traffic between nodes two pods connected over a L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions can perform east/west traffic between nodes two pods connected over a L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions can perform east/west traffic between nodes two pods connected over a L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions creates a networkStatus Annotation with UDN interface L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions creates a networkStatus Annotation with UDN interface L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using NetworkAttachmentDefinitions creates a networkStatus Annotation with UDN interface L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork can perform east/west traffic between nodes two pods connected over a L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork creates a networkStatus Annotation with UDN interface L2 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork creates a networkStatus Annotation with UDN interface L2 primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network created using UserDefinedNetwork creates a networkStatus Annotation with UDN interface L3 primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation a user defined primary network doesn't cause network name conflict [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller pod connected to ClusterUserDefinedNetwork CR & managed NADs cannot be deleted when being used [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD according to spec in each target namespace and report active namespaces [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD in new created namespaces that apply to namespace-selector [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller when CR is deleted, should delete all managed NAD in each target namespace [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller when namespace-selector is mutated should create NAD in namespaces that apply to mutated namespace-selector [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation ClusterUserDefinedNetwork CRD Controller when namespace-selector is mutated should delete managed NAD in namespaces that no longer apply to namespace-selector [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Default network multus annotation ValidatingAdmissionPolicy protection should prevent adding, modifying and removing the default-network annotation on existing pods [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Default network multus annotation when added with static IP and MAC to a pod belonging to primary UDN should create the pod with the specified static IP and MAC address without persistent IPAM enabled [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Default network multus annotation when added with static IP and MAC to a pod belonging to primary UDN should create the pod with the specified static IP and MAC address with persistent IPAM [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Network Policies on a user defined primary network pods within namespace should be isolated when deny policy is present in L2 dualstack primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Network Policies on a user defined primary network pods within namespace should be isolated when deny policy is present in L2 dualstack primary UDN with custom network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Network Policies on a user defined primary network pods within namespace should be isolated when deny policy is present in L3 dualstack primary UDN [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN duplicate IP validation with primary UDN layer 2 pods should fail when creating second pod with duplicate static IP IPv4 duplicate [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN duplicate IP validation with primary UDN layer 2 pods should fail when creating second pod with duplicate static IP IPv6 duplicate [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN should respect network configuration Layer2 basic configuration [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN should respect network configuration Layer2 with custom subnets [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN should respect network configuration Layer2 with inverted gateway/management IPs [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv4 infrastructure subnets [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv4 reserved subnets [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv6 infrastructure subnets [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv6 reserved subnets [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN with custom network, cluster-networked pods, NodePort service [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for L2 secondary network pod connected to UserDefinedNetwork cannot be deleted when being used [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for L2 secondary network should create NetworkAttachmentDefinition according to spec [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for L2 secondary network should delete NetworkAttachmentDefinition when UserDefinedNetwork is deleted [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for primary UDN without required namespace label should be able to create pod and it will attach to the cluster default network [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for primary UDN without required namespace label should not be able to update the namespace and add the UDN label [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation UserDefinedNetwork CRD Controller for primary UDN without required namespace label should not be able to update the namespace and remove the UDN label [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation when primary network exist, ClusterUserDefinedNetwork status should report not-ready [Suite:openshift/conformance/parallel]", + "[Feature:NetworkSegmentation][ovn-kubernetes-ote][sig-network] Network Segmentation when primary network exist, UserDefinedNetwork status should report not-ready [Suite:openshift/conformance/parallel]", +} diff --git a/openshift/test/generated/zz_generated.annotations.go b/openshift/test/generated/zz_generated.annotations.go index 3ec39fa006..4929f618f5 100644 --- a/openshift/test/generated/zz_generated.annotations.go +++ b/openshift/test/generated/zz_generated.annotations.go @@ -1421,6 +1421,8 @@ var AppendedAnnotations = map[string]string{ "Services does not use host masquerade address as source IP address when communicating externally": "[Disabled:Unimplemented]", + "Services of type NodePort should be able to preserve UDP traffic when server pod cycles for a NodePort service via a different node": "[Disabled:Unimplemented]", + "Services of type NodePort should handle IP fragments": "[Disabled:Unimplemented]", "Services of type NodePort should listen on each host addresses": "[Disabled:Unimplemented]", @@ -1551,6 +1553,10 @@ var AppendedAnnotations = map[string]string{ "e2e delete databases recovering from deleting db files while maintaining connectivity when deleting both db files on ovnkube-db-2": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Cluster Default Network Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Cluster Default Network Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Cluster Default Network Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1591,6 +1597,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Cluster Default Network of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1631,6 +1641,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv4 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1671,6 +1685,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv4 L3 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L2 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", @@ -1711,6 +1729,10 @@ var AppendedAnnotations = map[string]string{ "e2e egress IP validation Network Segmentation: IPv6 L2 role primary of replies to egress IP packets that require fragmentation [LGW][IPv4]": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is being added by a regular user": "[Disabled:Unimplemented]", + + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should fail if egressip-mark annotation is present during EgressIP creation": "[Disabled:Unimplemented]", + "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should handle EIP reassignment correctly on namespace and pod label updates, and EIP object updates": "[Disabled:Unimplemented]", "e2e egress IP validation Network Segmentation: IPv6 L3 role primary Should re-assign egress IPs when node readiness / reachability goes down/up": "[Disabled:Unimplemented]", diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 87323cdb2b..be0313b305 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -1066,9 +1066,11 @@ var _ = ginkgo.Describe("test e2e pod connectivity to host addresses", func() { framework.Failf("Test requires >= 1 Ready nodes, but there are only %v nodes", len(nodes.Items)) } workerNodeName = nodes.Items[0].Name - // Add another IP address to the worker + // Add another IP address to the worker with preferred_lft 0 to mark it as deprecated. + // This prevents the IP from being selected as the node's primary gateway IP while still + // allowing the test to verify pod-to-host connectivity to non-node IPs. _, err = infraprovider.Get().ExecK8NodeCommand(workerNodeName, []string{"ip", "a", "add", - fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName()}) + fmt.Sprintf("%s/%s", targetIP, singleIPMask), "dev", deploymentconfig.Get().ExternalBridgeName(), "preferred_lft", "0"}) framework.ExpectNoError(err, "failed to add IP to %s", workerNodeName) }) @@ -1946,6 +1948,20 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { return fmt.Sprintf(collectorContainerTemplate, port) } + getCollectorArgs := func(protocol flowMonitoringProtocol, port uint16) []string { + args := []string{"-kafka=false"} + switch protocol { + case sflow: + // Disable other collectors to avoid non-deterministic startup ordering in logs. + args = append(args, "-nf=false", "-nfl=false", "-sflow=true", fmt.Sprintf("-sflow.port=%d", port)) + case netflow_v5: + args = append(args, "-nf=false", "-sflow=false", "-nfl=true", fmt.Sprintf("-nfl.port=%d", port)) + case ipfix: + args = append(args, "-nfl=false", "-sflow=false", "-nf=true", fmt.Sprintf("-nf.port=%d", port)) + } + return args + } + keywordInLogs := map[flowMonitoringProtocol]string{ netflow_v5: "NETFLOW_V5", ipfix: "IPFIX", sflow: "SFLOW_5"} @@ -1966,7 +1982,7 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { primaryProviderNetwork, err := infraprovider.Get().PrimaryNetwork() framework.ExpectNoError(err, "failed to get primary network") collectorExternalContainer := infraapi.ExternalContainer{Name: getContainerName(collectorPort), Image: "cloudflare/goflow", - Network: primaryProviderNetwork, CmdArgs: []string{"-kafka=false"}, ExtPort: collectorPort} + Network: primaryProviderNetwork, CmdArgs: getCollectorArgs(protocol, collectorPort), ExtPort: collectorPort} collectorExternalContainer, err = providerCtx.CreateExternalContainer(collectorExternalContainer) if err != nil { framework.Failf("failed to start flow collector container %s: %v", getContainerName(collectorPort), err) @@ -1984,6 +2000,58 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { setEnv := map[string]string{ovnEnvVar: addressAndPort} setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), setEnv) + ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app=ovnkube-node", + }) + if err != nil { + framework.Failf("could not get ovnkube-node pods: %v", err) + } + + if protocol == sflow { + ginkgo.By("Waiting for ovnkube-node to configure br-int sflow and setting sampling/polling for better signal") + for _, ovnKubeNodePod := range ovnKubeNodePods.Items { + var sFlowUUID string + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + getSFlowExecOptions := e2epod.ExecOptions{ + Command: []string{"ovs-vsctl", "--if-exists", "get", "bridge", "br-int", "sflow"}, + Namespace: ovnKubeNamespace, + PodName: ovnKubeNodePod.Name, + ContainerName: getNodeContainerName(), + CaptureStdout: true, + CaptureStderr: true, + } + rawUUID, stderr, execErr := e2epod.ExecWithOptions(f, getSFlowExecOptions) + if execErr != nil { + framework.Logf("waiting for sflow row on %s: query failed: %v, stderr: %s", + ovnKubeNodePod.Name, execErr, stderr) + return false, nil + } + rawUUID = strings.TrimSpace(strings.Trim(rawUUID, "\"")) + if rawUUID == "" || rawUUID == "[]" { + framework.Logf("waiting for sflow row on %s: br-int has no sflow row yet", ovnKubeNodePod.Name) + return false, nil + } + sFlowUUID = rawUUID + return true, nil + }) + framework.ExpectNoError(err, "timed out waiting for br-int sflow row on %s", ovnKubeNodePod.Name) + + setSFlowExecOptions := e2epod.ExecOptions{ + Command: []string{"ovs-vsctl", "--if-exists", "set", "sflow", sFlowUUID, "sampling=1", "polling=1"}, + Namespace: ovnKubeNamespace, + PodName: ovnKubeNodePod.Name, + ContainerName: getNodeContainerName(), + CaptureStdout: true, + CaptureStderr: true, + } + _, setStderr, setErr := e2epod.ExecWithOptions(f, setSFlowExecOptions) + if setErr != nil { + framework.Logf("skipping sflow sampling tuning on %s: failed to set sampling/polling for row %s: %v, stderr: %s", + ovnKubeNodePod.Name, sFlowUUID, setErr, setStderr) + } + } + } + ginkgo.By(fmt.Sprintf("Checking that the collector container received %s data", protocolStr)) keyword := keywordInLogs[protocol] collectorContainerLogsTest := func() wait.ConditionFunc { @@ -1995,14 +2063,14 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { } collectorContainerLogs = strings.TrimSuffix(collectorContainerLogs, "\n") logLines := strings.Split(collectorContainerLogs, "\n") - lastLine := logLines[len(logLines)-1] // check that flow monitoring traffic has been logged - if strings.Contains(lastLine, keyword) { - framework.Logf("Successfully found string %s in last log line of"+ - " the collector: %s", keyword, lastLine) - return true, nil + for _, line := range logLines { + if strings.Contains(line, keyword) { + framework.Logf("Successfully found string %s in collector logs line: %s", keyword, line) + return true, nil + } } - framework.Logf("%s not found in last log line: %s", keyword, lastLine) + framework.Logf("%s not found in collector logs", keyword) return false, nil } } @@ -2014,7 +2082,7 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { ginkgo.By(fmt.Sprintf("Unsetting %s variable in ovnkube-node daemonset", ovnEnvVar)) setUnsetTemplateContainerEnv(f.ClientSet, ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), nil, ovnEnvVar) - ovnKubeNodePods, err := f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ + ovnKubeNodePods, err = f.ClientSet.CoreV1().Pods(ovnKubeNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app=ovnkube-node", }) if err != nil { @@ -2032,9 +2100,9 @@ var _ = ginkgo.Describe("e2e br-int flow monitoring export validation", func() { CaptureStderr: true, } - targets, stderr, _ := e2epod.ExecWithOptions(f, execOptions) + targets, stderr, execErr := e2epod.ExecWithOptions(f, execOptions) framework.Logf("execOptions are %v", execOptions) - if err != nil { + if execErr != nil { framework.Failf("could not lookup ovs %s targets: %v", protocolStr, stderr) } gomega.Expect(targets).To(gomega.BeEmpty()) diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index dc68b02a98..6e7d75f147 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -36,6 +36,7 @@ import ( e2enode "k8s.io/kubernetes/test/e2e/framework/node" "k8s.io/kubernetes/test/e2e/framework/pod" e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" utilnet "k8s.io/utils/net" ) @@ -3443,6 +3444,130 @@ spec: } }) + ginkgo.It("Should fail if egressip-mark annotation is present during EgressIP creation", func() { + // This check can be removed when https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5879 is addressed + if isHelmEnabled() { + e2eskipper.Skipf("Skipping this test for HELM environment as we dont create required Validatingadmissionpolicy in a HELM environment") + } + + ginkgo.By("1. Create an EgressIP object with one egress IP defined") + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") + + var egressIPConfig = `apiVersion: k8s.ovn.org/v1 +kind: EgressIP +metadata: + name: ` + egressIPName + ` + annotations: + ` + util.EgressIPMarkAnnotation + `: "50000" +spec: + egressIPs: + - ` + egressIP1.String() + ` + namespaceSelector: + matchLabels: + name: ` + f.Namespace.Name + ` +` + if err := os.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressIPYaml); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + + ginkgo.By("2. Create an EgressIP with k8s.ovn.org/egressip-mark annotation defined") + _, err = e2ekubectl.RunKubectl("default", "create", "-f", egressIPYaml) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark annotation is present during creation") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("EgressIP resources cannot be created with the \"k8s.ovn.org/egressip-mark\" annotation. This annotation is managed by the system."))) + }) + + ginkgo.It("Should fail if egressip-mark annotation is being added by a regular user", func() { + // This check can be removed when https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5879 is addressed + if isHelmEnabled() { + e2eskipper.Skipf("Skipping this test for HELM environment as we dont create required Validatingadmissionpolicy in a HELM environment") + } + + ginkgo.By("1. Add the \"k8s.ovn.org/egress-assignable\" label to egress1Node node") + egressNodeAvailabilityHandler := egressNodeAvailabilityHandlerViaLabel{f} + egressNodeAvailabilityHandler.Enable(egress1Node.name) + defer egressNodeAvailabilityHandler.Restore(egress1Node.name) + + podNamespace := f.Namespace + labels := map[string]string{ + "name": f.Namespace.Name, + } + updateNamespaceLabels(f, podNamespace, labels) + + ginkgo.By("2. Create an EgressIP object with one egress IP defined") + var egressIP1 net.IP + var err error + if utilnet.IsIPv6String(egress1Node.nodeIP) { + egressIP1, err = ipalloc.NewPrimaryIPv6() + } else { + egressIP1, err = ipalloc.NewPrimaryIPv4() + } + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "must allocate new Node IP") + + var egressIPConfig = `apiVersion: k8s.ovn.org/v1 +kind: EgressIP +metadata: + name: ` + egressIPName + ` +spec: + egressIPs: + - ` + egressIP1.String() + ` + namespaceSelector: + matchLabels: + name: ` + f.Namespace.Name + ` +` + if err := os.WriteFile(egressIPYaml, []byte(egressIPConfig), 0644); err != nil { + framework.Failf("Unable to write CRD config to disk: %v", err) + } + defer func() { + if err := os.Remove(egressIPYaml); err != nil { + framework.Logf("Unable to remove the CRD config from disk: %v", err) + } + }() + + framework.Logf("Create the EgressIP configuration") + e2ekubectl.RunKubectlOrDie("default", "create", "-f", egressIPYaml) + + ginkgo.By("3. Check that the status is of length one and that it is assigned to egress1Node") + statuses := verifyEgressIPStatusLengthEquals(1, nil) + if statuses[0].Node != egress1Node.name { + framework.Failf("Step 3. Check that the status is of length one and that it is assigned to egress1Node, failed") + } + + ginkgo.By("4. Try updating k8s.ovn.org/egressip-mark annotation") + // Get the current annotation value to ensure we try to overwrite with a different value + annotationsJSON, err := e2ekubectl.RunKubectl("", "get", "egressip", egressIPName, "-o", "jsonpath={.metadata.annotations}") + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to get annotations") + var annotations map[string]string + err = json.Unmarshal([]byte(annotationsJSON), &annotations) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to unmarshal annotations JSON") + currentValue := annotations[util.EgressIPMarkAnnotation] + + newValue := 50000 + if currentValue == "50000" { + newValue = 50001 + } + + _, err = e2ekubectl.RunKubectl("", "annotate", "--overwrite", "egressip", egressIPName, fmt.Sprintf("%s=%d", util.EgressIPMarkAnnotation, newValue)) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark is being updated") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("The \"k8s.ovn.org/egressip-mark\" annotation cannot be modified or removed once set. This annotation is managed by the system."))) + + ginkgo.By("5. Try removing k8s.ovn.org/egressip-mark annotation") + _, err = e2ekubectl.RunKubectl("", "annotate", "--overwrite", "egressip", egressIPName, fmt.Sprintf("%s-", util.EgressIPMarkAnnotation)) + gomega.Expect(err).To(gomega.HaveOccurred(), "Should fail if k8s.ovn.org/egressip-mark is being removed") + gomega.Expect(err).To(gomega.MatchError(gomega.ContainSubstring("The \"k8s.ovn.org/egressip-mark\" annotation cannot be modified or removed once set. This annotation is managed by the system."))) + }) + ginkgo.DescribeTable("[OVN network] multiple namespaces with different primary networks", func(otherNetworkAttachParms networkAttachmentConfigParams) { if !isNetworkSegmentationEnabled() { ginkgo.Skip("network segmentation is disabled") diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 58a25d9774..532a45fe5d 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -721,51 +721,45 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac return valueStr, nil } - getIPFamilyFlagForIPRoute2 := func(ipStr string) (string, error) { + getIPFamilyForIPRoute2 := func(ipStr string) (string, error) { ip := net.ParseIP(ipStr) if ip == nil { return "", fmt.Errorf("invalid IP address: %s", ipStr) } if utilnet.IsIPv6(ip) { - return "-6", nil + return "inet6", nil } - return "-4", nil + return "inet", nil } getInterfaceNameUsingIP := func(ip string) (string, error) { - ipFlag, err := getIPFamilyFlagForIPRoute2(ip) + ipFamily, err := getIPFamilyForIPRoute2(ip) if err != nil { - return "", fmt.Errorf("failed to get IP family flag for %s: %w", ip, err) + return "", fmt.Errorf("failed to get IP family for %s: %w", ip, err) } - allInfAddrBytes, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "-br", ipFlag, "a", "sh").CombinedOutput() + cmdArgs := []string{"exec", "-i", containerName, "ip", "-o", "-f", ipFamily, "addr", "show"} + allInfAddrBytes, err := exec.Command(containerengine.Get().String(), cmdArgs...).CombinedOutput() if err != nil { - return "", fmt.Errorf("failed to find interface with IP %s on container %s with command 'ip -br a sh': err %v, out: %s", ip, containerName, - err, allInfAddrBytes) + return "", fmt.Errorf("failed to find interface with IP %s on container %s with command %q: err %v, out: %s", ip, containerName, + strings.Join(cmdArgs[3:], " "), err, allInfAddrBytes) } - var ipLine string + var infName string for _, line := range strings.Split(string(allInfAddrBytes), "\n") { if strings.Contains(line, ip) { - ipLine = line + fields := strings.Fields(line) + if len(fields) < 2 { + return "", fmt.Errorf("failed to parse 'ip addr' output line %q", line) + } + infName = strings.TrimSuffix(fields[1], ":") + if strings.Contains(infName, "@") { + infName = strings.SplitN(infName, "@", 2)[0] + } break } } - if ipLine == "" { + if infName == "" { return "", fmt.Errorf("failed to find IP %q within 'ip a' command on container %q:\n\n%q", ip, containerName, string(allInfAddrBytes)) } - ipLineSplit := strings.Split(ipLine, " ") - if len(ipLine) == 0 { - return "", fmt.Errorf("failed to find interface name from 'ip a' output line %q", ipLine) - } - infNames := ipLineSplit[0] - splitChar := " " - if strings.Contains(infNames, "@") { - splitChar = "@" - } - infNamesSplit := strings.Split(infNames, splitChar) - if len(infNamesSplit) == 0 { - return "", fmt.Errorf("failed to extract inf name + veth name from %q splitting by %q", infNames, splitChar) - } - infName := infNamesSplit[0] // validate its an interface name on the Node with iproute2 out, err := exec.Command(containerengine.Get().String(), "exec", "-i", containerName, "ip", "link", "show", infName).CombinedOutput() if err != nil { @@ -805,7 +799,7 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac if ni.IPv6 != "" { ni.InfName, err = getInterfaceNameUsingIP(ni.IPv6) if err != nil { - framework.Logf("failed to get network interface name using IPv4 address %s: %v", ni.IPv6, err) + framework.Logf("failed to get network interface name using IPv6 address %s: %v", ni.IPv6, err) } } ni.IPv6Prefix, err = getContainerNetwork(inspectNetworkIPv6PrefixKeyStr) diff --git a/test/e2e/service.go b/test/e2e/service.go index 2180fcc595..1de696e6e0 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -830,6 +830,151 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { // network is removed by provider Context API }) + ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a NodePort service via a different node", func(ctx context.Context) { + const ( + serviceName = "svc-udp" + srcPort = 12345 + podClient = "pod-client" + podBackend1 = "pod-server-1" + podBackend2 = "pod-server-2" + ) + var clientNodeInfo, serverNodeInfo, backendNodeInfo nodeInfo + + cs := f.ClientSet + ns := f.Namespace.Name + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + e2eskipper.Skipf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + family := v1.IPv4Protocol + if IsIPv6Cluster(cs) { + family = v1.IPv6Protocol + } + + ips := e2enode.GetAddressesByTypeAndFamily(&nodes.Items[0], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + clientNodeInfo = nodeInfo{ + name: nodes.Items[0].Name, + nodeIP: ips[0], + } + + ips = e2enode.GetAddressesByTypeAndFamily(&nodes.Items[1], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + backendNodeInfo = nodeInfo{ + name: nodes.Items[1].Name, + nodeIP: ips[0], + } + + ips = e2enode.GetAddressesByTypeAndFamily(&nodes.Items[2], v1.NodeInternalIP, family) + gomega.Expect(ips).ToNot(gomega.BeEmpty()) + + serverNodeInfo = nodeInfo{ + name: nodes.Items[2].Name, + nodeIP: ips[0], + } + + // Create a NodePort service + udpJig := e2eservice.NewTestJig(cs, ns, serviceName) + ginkgo.By("creating a UDP service " + serviceName + " with type=NodePort in " + ns) + udpService, err := udpJig.CreateUDPService(ctx, func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeNodePort + svc.Spec.Ports = []v1.ServicePort{ + {Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)}, + } + }) + framework.ExpectNoError(err) + + // Create a pod in one node to create the UDP traffic against the NodePort service every 5 seconds + ginkgo.By("creating a client pod for probing the service " + serviceName) + clientPod := e2epod.NewAgnhostPod(ns, podClient, nil, nil, nil) + nodeSelection := e2epod.NodeSelection{Name: clientNodeInfo.name} + e2epod.SetNodeSelection(&clientPod.Spec, nodeSelection) + cmd := fmt.Sprintf(`date; for i in $(seq 1 3000); do echo "$(date) Try: ${i}"; echo hostname | nc -u -w 5 -p %d %s %d; echo; done`, srcPort, serverNodeInfo.nodeIP, udpService.Spec.Ports[0].NodePort) + clientPod.Spec.Containers[0].Command = []string{"/bin/sh", "-c", cmd} + clientPod.Spec.Containers[0].Name = podClient + e2epod.NewPodClient(f).CreateSync(ctx, clientPod) + + // Read the client pod logs + logs, err := e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + + // Add a backend pod to the service in the other node + ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName + " at node " + backendNodeInfo.name) + serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) + serverPod1.Labels = udpJig.Labels + nodeSelection = e2epod.NodeSelection{Name: backendNodeInfo.name} + e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection) + e2epod.NewPodClient(f).CreateSync(ctx, serverPod1) + + ginkgo.By("Waiting for the endpoint to be ready") + err = framework.WaitForServiceEndpointsNum(ctx, f.ClientSet, f.Namespace.Name, + serviceName, 1, time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", + serviceName, f.Namespace.Name) + + logContainsFn := func(text, podName string) wait.ConditionWithContextFunc { + return func(ctx context.Context) (bool, error) { + logs, err := e2epod.GetPodLogs(ctx, cs, ns, podName, podName) + if err != nil { + // Retry the error next time. + return false, nil + } + if !strings.Contains(string(logs), text) { + return false, nil + } + return true, nil + } + } + // Note that the fact that Endpoints object already exists, does NOT mean + // that openflows were already programmed. + // Additionally take into account that UDP conntract entries timeout is + // 30 seconds by default. + // Based on the above check if the pod receives the traffic. + ginkgo.By("checking client pod connected to the backend 1 on Node IP " + serverNodeInfo.nodeIP) + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, logContainsFn(podBackend1, podClient)); err != nil { + logs, err = e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + framework.Failf("Failed to connect to backend 1") + } + + // Create a second pod + ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName + " at node " + backendNodeInfo.name) + serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) + serverPod2.Labels = udpJig.Labels + nodeSelection = e2epod.NodeSelection{Name: backendNodeInfo.name} + e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection) + e2epod.NewPodClient(f).CreateSync(ctx, serverPod2) + + // and delete the first pod + framework.Logf("Cleaning up %s pod", podBackend1) + e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) + + ginkgo.By("Waiting for the endpoint to be ready") + err = framework.WaitForServiceEndpointsNum(ctx, f.ClientSet, f.Namespace.Name, + serviceName, 1, time.Second, wait.ForeverTestTimeout) + framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", + serviceName, f.Namespace.Name) + + // Check that the second pod keeps receiving traffic + // UDP conntrack entries timeout is 30 sec by default + ginkgo.By("checking client pod connected to the backend 2 on Node IP " + serverNodeInfo.nodeIP) + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, logContainsFn(podBackend2, podClient)); err != nil { + logs, err = e2epod.GetPodLogs(ctx, cs, ns, podClient, podClient) + framework.ExpectNoError(err) + framework.Logf("Pod client logs: %s", logs) + framework.Failf("Failed to connect to backend 2") + } + }) + ginkgo.It("should listen on each host addresses", func() { endPoints := make([]*v1.Pod, 0) endpointsSelector := map[string]string{"servicebackend": "true"} diff --git a/test/e2e/util.go b/test/e2e/util.go index 08a9568d2d..5a9715d4a8 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1243,6 +1243,7 @@ func wrappedTestFramework(basename string) *framework.Framework { func newPrivelegedTestFramework(basename string) *framework.Framework { f := framework.NewDefaultFramework(basename) f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + f.NamespacePodSecurityWarnLevel = admissionapi.LevelPrivileged f.DumpAllNamespaceInfo = func(ctx context.Context, f *framework.Framework, namespace string) { debug.DumpAllNamespaceInfo(context.TODO(), f.ClientSet, namespace) } @@ -1444,6 +1445,11 @@ func isLocalGWModeEnabled() bool { return present && val == "local" } +func isHelmEnabled() bool { + val, present := os.LookupEnv("USE_HELM") + return present && val == "true" +} + func isPreConfiguredUdnAddressesEnabled() bool { ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_PRE_CONF_UDN_ADDR_ENABLE") diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index b9be17fd9f..e9cee42c41 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -187,6 +187,11 @@ else # https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5569 skip "Multi Homing" fi + if [ "$PLATFORM_IPV4_SUPPORT" == true ] && [ "$PLATFORM_IPV6_SUPPORT" == false ]; then + # Skip IPv6/dual-stack multihoming secondary network tests in IPv4-only clusters. + skip "Multi Homing.*L3 - routed - secondary network with IPv6 subnet" + skip "Multi Homing.*L3 - routed - secondary network with a dual stack configuration" + fi # these tests require metallb but the configuration we do for it is not compatible with the configuration we do to advertise the default network # TODO: consolidate configuration skip "Load Balancer Service Tests with MetalLB" @@ -225,6 +230,20 @@ if [ "${PARALLEL:-false}" = "true" ]; then skip_label "$SERIAL_LABEL" fi +if [ "$ENABLE_NO_OVERLAY" == true ]; then + # No-overlay mode uses underlying network infrastructure directly. + # Overlay-dependent features are not supported. + skip_label "Feature:Multicast" + skip_label "Feature:EgressIP" + skip_label "Feature:EgressService" + # This test validates MTU reduction behavior specific to overlay mode (1500->1400). + # In no-overlay mode, pods use the full underlying network MTU without reduction. + skip "blocking ICMP needs frag" + # This test validates MTU reduction due to Geneve encapsulation overhead (1400->1342). + # In no-overlay mode, there is no encapsulation and thus no MTU overhead. + skip "Pod to pod TCP with low MTU" +fi + # setting these is required to make RuntimeClass tests work ... :/ export KUBE_CONTAINER_RUNTIME=remote export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock