diff --git a/.github/actions/cleanup-action/package-lock.json b/.github/actions/cleanup-action/package-lock.json index 322555b10a..a2d1053b11 100644 --- a/.github/actions/cleanup-action/package-lock.json +++ b/.github/actions/cleanup-action/package-lock.json @@ -741,9 +741,9 @@ } }, "ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", "dev": true }, "ansi-styles": { @@ -3446,9 +3446,9 @@ } }, "minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", "dev": true }, "mixin-deep": { @@ -3953,9 +3953,9 @@ }, "dependencies": { "ansi-regex": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.0.tgz", - "integrity": "sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.1.tgz", + "integrity": "sha512-ILlv4k/3f6vfQ4OoP2AGvirOktlQ98ZEL1k9FaQjxa3L1abBgbuTDAdPOpvbGncC0BTVQrl+OM8xZGK6tWXt7g==", "dev": true } } @@ -4642,9 +4642,9 @@ }, "dependencies": { "ansi-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", - "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.1.tgz", + "integrity": "sha512-+O9Jct8wf++lXxxFc4hc8LsjaSq0HFzzL7cVsw8pRDIPdjKD2mT4ytDZlLuSBZ4cLKZFXIrMGO7DbQCtMJJMKw==", "dev": true }, "strip-ansi": { @@ -4710,9 +4710,9 @@ }, "dependencies": { "ansi-regex": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.0.tgz", - "integrity": "sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.1.tgz", + "integrity": "sha512-ILlv4k/3f6vfQ4OoP2AGvirOktlQ98ZEL1k9FaQjxa3L1abBgbuTDAdPOpvbGncC0BTVQrl+OM8xZGK6tWXt7g==", "dev": true } } diff --git a/.github/actions/retest-action/entrypoint.sh b/.github/actions/retest-action/entrypoint.sh index 5526617c5f..d24485b99e 100755 --- a/.github/actions/retest-action/entrypoint.sh +++ b/.github/actions/retest-action/entrypoint.sh @@ -2,16 +2,61 @@ set -ex +############################## +# Prerequisites check +############################## + if ! jq -e '.issue.pull_request' ${GITHUB_EVENT_PATH}; then echo "Not a PR... Exiting." exit 0 fi -if [ "$(jq -r '.comment.body' ${GITHUB_EVENT_PATH})" != "/retest" ]; then - echo "Nothing to do... Exiting." +COMMENT_BODY=$(jq -r '.comment.body' ${GITHUB_EVENT_PATH}) +if [ "${COMMENT_BODY}" != "/retest" ] && + [ "${COMMENT_BODY}" != "/retest-failed" ] && + [ "${COMMENT_BODY}" != "/cancel" ] && + [ "${COMMENT_BODY}" != "/help" ]; then + echo "Unknown action. Nothing to do... Exiting." exit 0 fi +############################## +# functions section +############################## + +send_reaction() { + local REACTION_SYMBOL="$1" + local REACTION_URL="$(jq -r '.comment.url' ${GITHUB_EVENT_PATH})/reactions" + curl --request POST \ + --url "${REACTION_URL}" \ + --header "authorization: Bearer ${GITHUB_TOKEN}" \ + --header "accept: application/vnd.github.squirrel-girl-preview+json" \ + --header "content-type: application/json" \ + --data '{ "content" : "'${REACTION_SYMBOL}'" }' +} + +send_comment() { + local COMMENT="$1" + local COMMENTS_URL=$(jq -r '.issue.comments_url' ${GITHUB_EVENT_PATH}) + curl --request POST \ + --url "${COMMENTS_URL}" \ + --header "authorization: Bearer ${GITHUB_TOKEN}" \ + --header "accept: application/vnd.github.squirrel-girl-preview+json" \ + --header "content-type: application/json" \ + --data '{ "body" : "'"${COMMENT}"'" }' +} + +############################## +# logic section +############################## + +ACTION="${COMMENT_BODY}" + +if [ "$ACTION" == "/help" ]; then + send_comment "Supported operations are /retest, /retest-failed, /cancel" + exit 0 +fi + PR_URL=$(jq -r '.issue.pull_request.url' ${GITHUB_EVENT_PATH}) curl --request GET \ @@ -27,19 +72,38 @@ curl --request GET \ --header "authorization: Bearer ${GITHUB_TOKEN}" \ --header "content-type: application/json" | jq '.workflow_runs | max_by(.run_number)' > run.json -RERUN_URL=$(jq -r '.rerun_url' run.json) +ACTION_URL="" +if [ "$ACTION" == "/retest" ]; then + ACTION_URL=$(jq -r '.rerun_url' run.json) +elif [ "$ACTION" == "/retest-failed" ]; then + # New feature, rerun failed jobs: + # https://docs.github.com/en/rest/reference/actions#re-run-failed-jobs-from-a-workflow-run + RERUN_URL=$(jq -r '.rerun_url' run.json) + ACTION_URL=${RERUN_URL}-failed-jobs +elif [ "$ACTION" == "/cancel" ]; then + ACTION_URL=$(jq -r '.cancel_url' run.json) +else + echo "Something went wrong, unsupported action" + exit 0 +fi -curl --request POST \ - --url "${RERUN_URL}" \ +# Execute the action. +# Store the response code in a variable. +# Store the answer in file .action-response.json. +RESPONSE_CODE=$(curl --write-out '%{http_code}' --silent --output .action-response.json --request POST \ + --url "${ACTION_URL}" \ --header "authorization: Bearer ${GITHUB_TOKEN}" \ - --header "content-type: application/json" + --header "content-type: application/json") -REACTION_URL="$(jq -r '.comment.url' ${GITHUB_EVENT_PATH})/reactions" +REACTION_SYMBOL="rocket" +if ! echo ${RESPONSE_CODE} | egrep -q '^2'; then + REACTION_SYMBOL="confused" +fi +send_reaction "${REACTION_SYMBOL}" -curl --request POST \ - --url "${REACTION_URL}" \ - --header "authorization: Bearer ${GITHUB_TOKEN}" \ - --header "accept: application/vnd.github.squirrel-girl-preview+json" \ - --header "content-type: application/json" \ - --data '{ "content" : "rocket" }' \ No newline at end of file +# In case we received a non 2xx response code, relay the error message as a comment. +if ! echo ${RESPONSE_CODE} | egrep -q '^2'; then + RESPONSE_MESSAGE=$(jq -r '.message' .action-response.json) + send_comment "Oops, something went wrong:\n~~~\n${RESPONSE_MESSAGE}\n~~~\n" +fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 859bcbb7d4..3d3f27246f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,20 +19,10 @@ env: # This must be a directory CI_IMAGE_CACHE: /tmp/image_cache/ - # This must be a directory - CI_RUN_LOG_CACHE: /tmp/run_logs/ - # This must be a file - CI_LAST_RUN_STATUS_CACHE: /tmp/last_run_status - CI_IMAGE_MASTER_TAR: image-master.tar CI_IMAGE_PR_TAR: image-pr.tar CI_DIST_IMAGES_OUTPUT: dist/images/_output/ - CI_LOGS_OVN_UPGRADE: logs_ovn_upgrade.txt - CI_LOGS_SHARD_CONFORMANCE: logs_shard_conformance.txt - CI_LOGS_GENERIC: logs.txt - CI_LOGS_DUAL_STACK: dual_stack_logs.txt - CI_LOGS_SINGLE_STACK: single_stack_logs.txt jobs: # separate job for parallelism @@ -46,7 +36,7 @@ jobs: - name: Verify uses: golangci/golangci-lint-action@v2 with: - version: v1.33.2 + version: v1.45.2 working-directory: go-controller args: --modules-download-mode=vendor --timeout=15m0s --verbose @@ -258,83 +248,46 @@ jobs: OVN_GATEWAY_MODE: "${{ matrix.gateway-mode }}" OVN_MULTICAST_ENABLE: "false" steps: - # This will write to key ${{ env.JOB_NAME }}-${{ github.run_id }} - - name: Initialize last run status cache - id: last_run_status_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_LAST_RUN_STATUS_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-last-run-status - - # The last run status comes from the run_cache file in the cache - # Verify all of the following steps. Only execute them if the cache does not - # contain: steps.last_run_status.outputs.STATUS != 'completed' and if none - # of the previous steps have failed - - name: Fetch last run status from file in cache - id: last_run_status - run: | - if [ -f ${CI_LAST_RUN_STATUS_CACHE} ]; then - cat ${CI_LAST_RUN_STATUS_CACHE} - fi - - # Create a cache for test results - - name: Create cache for run logs - id: run_log_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_RUN_LOG_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-run-logs - - name: Set up Go - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/setup-go@v1 with: go-version: ${{ env.GO_VERSION }} id: go - name: Set up environment - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | export GOPATH=$(go env GOPATH) echo "GOPATH=$GOPATH" >> $GITHUB_ENV echo "$GOPATH/bin" >> $GITHUB_PATH - name: Free up disk space - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: sudo eatmydata apt-get remove --auto-remove -y aspnetcore-* dotnet-* libmono-* mono-* msbuild php-* php7* ghc-* zulu-* - name: Download test-image-master - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/download-artifact@v2 with: name: test-image-master - name: Disable ufw - if: steps.last_run_status.outputs.STATUS != 'completed' && success() # For IPv6 and Dualstack, ufw (Uncomplicated Firewall) should be disabled. # Not needed for KIND deployments, so just disable all the time. run: | sudo ufw disable - name: Load docker image - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | docker load --input ${CI_IMAGE_MASTER_TAR} - name: Check out code into the Go module directory - from PR branch - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/checkout@v2 - name: kind setup - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | export OVN_IMAGE="ovn-daemonset-f:dev" make -C test install-kind - name: Export kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() run: | mkdir -p /tmp/kind/logs kind export logs --name ${KIND_CLUSTER_NAME} --loglevel=debug /tmp/kind/logs @@ -345,69 +298,43 @@ jobs: docker exec ovn-worker2 crictl images - name: Upload kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }} path: /tmp/kind/logs - name: Download test-image-pr - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/download-artifact@v2 with: name: test-image-pr - name: Load docker image - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | docker load --input ${CI_IMAGE_PR_TAR} - name: ovn upgrade - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_OVN_UPGRADE}) 2>&1 export OVN_IMAGE="ovn-daemonset-f:pr" make -C test upgrade-ovn - name: Run E2E shard-conformance - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_SHARD_CONFORMANCE}) 2>&1 make -C test shard-conformance - name: Export kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() run: | mkdir -p /tmp/kind/logs-kind-pr-branch kind export logs --name ${KIND_CLUSTER_NAME} --loglevel=debug /tmp/kind/logs-kind-pr-branch - name: Upload kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }}-after-upgrade path: /tmp/kind/logs-kind-pr-branch - # The following steps will run if the job is marked completed and no step failed - - name: Display run logs from successful tests - if: steps.last_run_status.outputs.STATUS == 'completed' && success() - continue-on-error: true - run: | - if [ -f ${CI_RUN_LOG_CACHE}/${CI_LOGS_OVN_UPGRADE} ]; then - cat ${CI_RUN_LOG_CACHE}/${CI_LOGS_OVN_UPGRADE} - fi - if [ -f ${CI_RUN_LOG_CACHE}/${CI_LOGS_SHARD_CONFORMANCE} ]; then - cat ${CI_RUN_LOG_CACHE}/${CI_LOGS_SHARD_CONFORMANCE} - fi - - # This will set the name=STATUS to 'completed' if none of the above steps - # failed - - name: Set last run status to completed - run: | - echo '::set-output name=STATUS::completed' > ${CI_LAST_RUN_STATUS_CACHE} - e2e: name: e2e if: github.event_name != 'schedule' @@ -463,52 +390,20 @@ jobs: KIND_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" KIND_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" steps: - # This will write to key ${{ env.JOB_NAME }}-${{ github.run_id }} - - name: Initialize last run status cache - id: last_run_status_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_LAST_RUN_STATUS_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-last-run-status - - # The last run status comes from the run_cache file in the cache - # Verify all of the following steps. Only execute them if the cache does not - # contain: steps.last_run_status.outputs.STATUS != 'completed' and if none - # of the previous steps have failed - - name: Fetch last run status from file in cache - id: last_run_status - run: | - if [ -f ${CI_LAST_RUN_STATUS_CACHE} ]; then - cat ${CI_LAST_RUN_STATUS_CACHE} - fi - - # Create a cache for test results - - name: Create cache for run logs - id: run_log_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_RUN_LOG_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-run-logs - name: Free up disk space - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: sudo eatmydata apt-get remove --auto-remove -y aspnetcore-* dotnet-* libmono-* mono-* msbuild php-* php7* ghc-* zulu-* - name: Set up Go - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/setup-go@v2 with: go-version: ${{ env.GO_VERSION }} id: go - name: Check out code into the Go module directory - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/checkout@v2 - name: Set up environment - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | export GOPATH=$(go env GOPATH) echo "GOPATH=$GOPATH" >> $GITHUB_ENV @@ -519,76 +414,53 @@ jobs: fi - name: Disable ufw - if: steps.last_run_status.outputs.STATUS != 'completed' && success() # For IPv6 and Dualstack, ufw (Uncomplicated Firewall) should be disabled. # Not needed for KIND deployments, so just disable all the time. run: | sudo ufw disable - name: Download test-image-pr - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/download-artifact@v2 with: name: test-image-pr - name: Load docker image - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | docker load --input ${CI_IMAGE_PR_TAR} - name: kind setup - if: steps.last_run_status.outputs.STATUS != 'completed' && success() timeout-minutes: 30 run: | export OVN_IMAGE="ovn-daemonset-f:pr" make -C test install-kind - name: Run Tests - if: steps.last_run_status.outputs.STATUS != 'completed' && success() # e2e tests take ~60 minutes normally, 90 should be more than enough # set 2 1/2 hours for control-plane tests as these might take a while timeout-minutes: ${{ matrix.target == 'control-plane' && 150 || 90 }} run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_GENERIC}) 2>&1 make -C test ${{ matrix.target }} - # The following steps will always run unless the job is marked as completed - name: Upload Junit Reports - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-junit-${{ env.JOB_NAME }}-${{ github.run_id }} path: './test/_artifacts/*.xml' - name: Export kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() run: | mkdir -p /tmp/kind/logs kind export logs --name ${KIND_CLUSTER_NAME} --loglevel=debug /tmp/kind/logs - name: Upload kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }} path: /tmp/kind/logs - # The following steps will run if the job is marked completed and no step failed - - name: Display run logs from successful tests - if: steps.last_run_status.outputs.STATUS == 'completed' && success() - continue-on-error: true - run: | - if [ -f ${CI_RUN_LOG_CACHE}/${CI_LOGS_GENERIC} ]; then - cat ${CI_RUN_LOG_CACHE}/${CI_LOGS_GENERIC} - fi - - # This will set the name=STATUS to 'completed' if none of the above steps - # failed - - name: Set last run status to completed - run: | - echo '::set-output name=STATUS::completed' > ${CI_LAST_RUN_STATUS_CACHE} - e2e-dual-conversion: name: e2e-dual-conversion if: github.event_name != 'schedule' @@ -608,145 +480,82 @@ jobs: OVN_GATEWAY_MODE: "${{ matrix.gateway-mode }}" OVN_MULTICAST_ENABLE: "false" steps: - # This will write to key ${{ env.JOB_NAME }}-${{ github.run_id }} - - name: Initialize last run status cache - id: last_run_status_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_LAST_RUN_STATUS_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-last-run-status - - # The last run status comes from the run_cache file in the cache - # Verify all of the following steps. Only execute them if the cache does not - # contain: steps.last_run_status.outputs.STATUS != 'completed' and if none - # of the previous steps have failed - - name: Fetch last run status from file in cache - id: last_run_status - run: | - if [ -f ${CI_LAST_RUN_STATUS_CACHE} ]; then - cat ${CI_LAST_RUN_STATUS_CACHE} - fi - - # Create a cache for test results - - name: Create cache for run logs - id: run_log_cache - uses: actions/cache@v2 - with: - path: | - ${{ env.CI_RUN_LOG_CACHE }} - key: ${{ env.JOB_NAME }}-${{ github.run_id }}-run-logs - name: Set up Go - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/setup-go@v2 with: go-version: ${{ env.GO_VERSION }} id: go - name: Check out code into the Go module directory - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/checkout@v2 - name: Set up environment - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | export GOPATH=$(go env GOPATH) echo "GOPATH=$GOPATH" >> $GITHUB_ENV echo "$GOPATH/bin" >> $GITHUB_PATH - name: Disable ufw - if: steps.last_run_status.outputs.STATUS != 'completed' && success() # For IPv6 and Dualstack, ufw (Uncomplicated Firewall) should be disabled. # Not needed for KIND deployments, so just disable all the time. run: | sudo ufw disable - name: Download test-image-pr - if: steps.last_run_status.outputs.STATUS != 'completed' && success() uses: actions/download-artifact@v2 with: name: test-image-pr - name: Load docker image - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | docker load --input ${CI_IMAGE_PR_TAR} - name: kind IPv4 setup - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | export OVN_IMAGE="ovn-daemonset-f:pr" make -C test install-kind - name: Run Single-Stack Tests - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_SINGLE_STACK}) 2>&1 make -C test shard-test WHAT="Networking Granular Checks" - name: Convert IPv4 cluster to Dual Stack - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | ./contrib/kind-dual-stack-conversion.sh - name: Run Dual-Stack Tests - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_DUAL_STACK}) 2>&1 KIND_IPV4_SUPPORT="true" KIND_IPV6_SUPPORT="true" make -C test shard-test WHAT="Networking Granular Checks\|DualStack" - name: Run Dual-Stack Control-Plane Tests - if: steps.last_run_status.outputs.STATUS != 'completed' && success() run: | - mkdir -p ${CI_RUN_LOG_CACHE} - exec > >(tee -a ${CI_RUN_LOG_CACHE}/${CI_LOGS_DUAL_STACK}) 2>&1 KIND_IPV4_SUPPORT="true" KIND_IPV6_SUPPORT="true" make -C test control-plane WHAT="DualStack" - name: Upload Junit Reports - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-junit-${{ env.JOB_NAME }}-${{ github.run_id }} path: './test/_artifacts/*.xml' - name: Export kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() run: | mkdir -p /tmp/kind/logs kind export logs --name ${KIND_CLUSTER_NAME} --loglevel=debug /tmp/kind/logs - name: Upload kind logs - if: steps.last_run_status.outputs.STATUS != 'completed' && always() + if: always() uses: actions/upload-artifact@v2 with: name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }} path: /tmp/kind/logs - # The following steps will run if the job is marked completed and no step failed - - name: Display run logs from successful tests - if: steps.last_run_status.outputs.STATUS == 'completed' && success() - continue-on-error: true - run: | - if [ -f ${CI_RUN_LOG_CACHE}/${CI_LOGS_SINGLE_STACK} ]; then - cat ${CI_RUN_LOG_CACHE}/${CI_LOGS_SINGLE_STACK} - fi - if [ -f ${CI_RUN_LOG_CACHE}/${CI_LOGS_DUAL_STACK} ]; then - cat ${CI_RUN_LOG_CACHE}/${CI_LOGS_DUAL_STACK} - fi - - # This will set the name=STATUS to 'completed' if none of the above steps - # failed - - name: Set last run status to completed - run: | - echo '::set-output name=STATUS::completed' > ${CI_LAST_RUN_STATUS_CACHE} - e2e-periodic: name: e2e-periodic if: github.event_name == 'schedule' diff --git a/contrib/kind.yaml.j2 b/contrib/kind.yaml.j2 index 41853f3dab..ee2409332d 100644 --- a/contrib/kind.yaml.j2 +++ b/contrib/kind.yaml.j2 @@ -14,10 +14,6 @@ networking: {%- if ovn_ip_family %} ipFamily: {{ ovn_ip_family }} {%- endif %} -featureGates: -{%- if ovn_ip_family == "dual" %} - IPv6DualStack: true -{%- endif %} {%- if use_local_registy == "true"%} containerdConfigPatches: - |- diff --git a/dist/images/Dockerfile.fedora b/dist/images/Dockerfile.fedora index fe31c603ea..e43ea2cd6b 100644 --- a/dist/images/Dockerfile.fedora +++ b/dist/images/Dockerfile.fedora @@ -15,7 +15,7 @@ USER root ENV PYTHONDONTWRITEBYTECODE yes -ARG ovnver=ovn-21.12.0-4.fc35 +ARG ovnver=ovn-21.12.0-5.fc35 # Automatically populated when using docker buildx ARG TARGETPLATFORM ARG BUILDPLATFORM diff --git a/docs/design/external_traffic_policy.md b/docs/design/service_traffic_policy.md similarity index 54% rename from docs/design/external_traffic_policy.md rename to docs/design/service_traffic_policy.md index 78adc6f86e..cca72e3b0e 100644 --- a/docs/design/external_traffic_policy.md +++ b/docs/design/service_traffic_policy.md @@ -1,14 +1,15 @@ -# K8's Services' ExternalTraffic Policy Implementation +# Kubernetes Service Traffic Policy Implementation -## Background + +## External Traffic Policy For [Kubernetes Services](https://kubernetes.io/docs/concepts/services-networking/service/) of type Nodeport or Loadbalancer a user can set the `service.spec.externalTrafficPolicy` field to either `cluster` or `local` to denote whether or not external traffic is routed to cluster-wide or node-local endpoints. The default value for the `externalTrafficPolicy` field is `cluster`. In this configuration in ingress traffic is equally disributed across all backends and the original client IP address is lost due to SNAT. If set to `local` then the client -source IP is propagated through the service and to the destination, while service traffic arriving at nodes without -local endpoints is dropped. +source IP is preserved throughout the service flow and if service traffic arrives at nodes without +local endpoints it gets dropped. See [sources](#sources) for more information on ETP=local. Setting an `ExternalTrafficPolicy` to `Local` is only allowed for Services of type `NodePort` or `LoadBalancer`. The APIServer enforces this requirement. @@ -18,7 +19,9 @@ APIServer enforces this requirement. To properly implement this feature for all relevant traffic flows, required changing how OVN, Iptables rules, and Physical OVS flows are updated and managed in OVN-Kubernetes -## OVN Load_Balancer configuration +## ExternalTrafficPolicy=Local + +### OVN Load_Balancer configuration Normally, each service in Kubernetes has a corresponding single Load_Balancer row created in OVN. This LB is attached to all node switches and gateway routers (GWRs). ExternalTrafficPolicy creates multiple LBs, however. @@ -34,19 +37,21 @@ All externally-accessible vips (NodePort, ExternalIPs, LoadBalancer Status IPs) will reside on this loadbalancer. The loadbalancer backends may be empty, depending on whether there are pods local to that node. -## Handling Flows between the overlay and underlay +### Handling Flows between the overlay and underlay In this section we will look at some relevant traffic flows when a service's `externalTrafficPolicy` is `local`. For these examples we will be using a Nodeport service, but the flow is generally the same for ExternalIP and Loadbalancer type services. -## Ingress Traffic +### Ingress Traffic This section will cover the networking entities hit when traffic ingresses a cluster via a service to either host networked pods or cluster networked pods. If its host networked pods, then the traffic flow is the same on both gateway modes. If its cluster networked pods, they will be different for each mode. ### External Source -> Service -> OVN pod +#### **Shared Gateway Mode** + This case is the same as normal shared gateway traffic ingress, meaning the externally sourced traffic is routed into OVN via flows on breth0, except in this case the new local load balancer is hit on the GR, which ensures the ip of the client is preserved by the time it gets to the destination Pod. @@ -73,22 +78,22 @@ eth0--->|breth0| ``` -1. Match on the incoming traffic via it's nodePort, send it to `table=6`: +1. Match on the incoming traffic via default flow on `table0`, send it to `table1`: ``` - cookie=0xb4e7084fbba8bb8a, duration=100.388s, table=0, n_packets=6, n_bytes=484, idle_age=9, priority=110,tcp,in_port=1,tp_dst=30820 actions=ct(commit,table=6,zone=64003) +cookie=0xdeff105, duration=3189.786s, table=0, n_packets=99979, n_bytes=298029215, priority=50,ip,in_port=eth0 actions=ct(table=1,zone=64000) ``` 2. Send it out to LOCAL ovs port on breth0 and traffic is delivered to the host: ``` - cookie=0xe745ecf105, duration=119.391s, table=6, n_packets=6, n_bytes=484, idle_age=28, priority=110 actions=LOCAL +cookie=0xdeff105, duration=3189.787s, table=1, n_packets=108, n_bytes=23004, priority=0 actions=NORMAL ``` 3. In the host, we have an IPtable rule in the PREROUTING chain that DNATs this packet matched on nodePort to a masqueradeIP (169.254.169.3) used specially for this traffic flow. ``` -[1:60] -A OVN-KUBE-NODEPORT -p tcp -m addrtype --dst-type LOCAL -m tcp --dport 31787 -j DNAT --to-destination 169.254.169.3:31787 +[3:180] -A OVN-KUBE-NODEPORT -p tcp -m addrtype --dst-type LOCAL -m tcp --dport 31746 -j DNAT --to-destination 169.254.169.3:31746 ``` 4. The special masquerade route in the host sends this packet into OVN via the management port. @@ -100,7 +105,7 @@ eth0--->|breth0| 5. Since by default, all traffic into `ovn-k8s-mp0` gets SNAT-ed, we add an IPtable rule to `OVN-KUBE-SNAT-MGMTPORT` chain to ensure it doesn't get SNAT-ed to preserve its source-ip. ``` -[1:60] -A OVN-KUBE-SNAT-MGMTPORT -p tcp -m tcp --dport 31787 -j RETURN +[3:180] -A OVN-KUBE-SNAT-MGMTPORT -p tcp -m tcp --dport 31746 -j RETURN ``` 6. Traffic enters the node local switch on the worker node and hits the load-balancer where we add a new vip for this masqueradeIP to DNAT it correctly to the local backends. Note that this vip will translate only to the backends that are local to that worker node and hence traffic will be rejected if there is no local endpoint thus respecting ETP=local type traffic rules. @@ -116,7 +121,7 @@ name : "Service_default/example-service-1_TCP_node_switch_ovn-wor options : {event="false", reject="true", skip_snat="false"} protocol : tcp selection_fields : [] -vips : {"169.254.169.3:30820"="10.244.1.4:8080,10.244.1.5:8080", "172.18.0.4:30820"="10.244.1.4:8080,10.244.1.5:8080"} +vips : {"169.254.169.3:31746"="10.244.1.3:8080", "172.18.0.3:31746"="10.244.1.3:8080,10.244.2.3:8080"} ``` The switch load-balancer on a node without local endpoints will look like this: @@ -129,43 +134,37 @@ name : "Service_default/example-service-1_TCP_node_switch_ovn-wor options : {event="false", reject="true", skip_snat="false"} protocol : tcp selection_fields : [] -vips : {"169.254.169.3:30820"="", "172.18.0.3:30820"="10.244.1.4:8080,10.244.1.5:8080"} +vips : {"169.254.169.3:31746"="", "172.18.0.4:31746"="10.244.1.3:8080,10.244.2.3:8080"} ``` -Response traffic will follow the same path (backend->node switch->mp0->host->breth0). +Response traffic will follow the same path (backend->node switch->mp0->host->breth0->eth0). -7. Return traffic gets matched on src port being that of the nodePort and is sent to `table=7` +7. Return traffic gets matched on default flow in `table0` and it sent out via default interface back to the external source. ``` - cookie=0xb4e7084fbba8bb8a, duration=190.099s, table=0, n_packets=4, n_bytes=408, idle_age=99, priority=110,tcp,in_port=LOCAL,tp_src=30820 actions=ct(table=7,zone=64003) -``` - -8. Send the traffic back out breth0 back to the external source in `table=7` - -``` - cookie=0xe745ecf105, duration=210.560s, table=7, n_packets=4, n_bytes=408, priority=110 actions=output:eth0 +cookie=0xdeff105, duration=12994.192s, table=0, n_packets=47706, n_bytes=3199460, idle_age=0, priority=100,ip,in_port=LOCAL actions=ct(commit,zone=64000,exec(load:0x2->NXM_NX_CT_MARK[])),output:1 ``` The conntrack state looks like this: ``` -[NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 [UNREPLIED] src=172.18.0.4 dst=172.18.0.1 sport=30820 dport=30366 zone=64003 -[NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 [UNREPLIED] src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 -[NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=169.254.169.3 sport=30366 dport=30820 [UNREPLIED] src=10.244.1.4 dst=172.18.0.1 sport=8080 dport=30366 zone=9 -[NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=10.244.1.4 sport=30366 dport=8080 [UNREPLIED] src=10.244.1.4 dst=172.18.0.1 sport=8080 dport=30366 zone=14 -[UPDATE] tcp 6 60 SYN_RECV src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 -[UPDATE] tcp 6 432000 ESTABLISHED src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 [ASSURED] -[UPDATE] tcp 6 120 FIN_WAIT src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 [ASSURED] -[UPDATE] tcp 6 30 LAST_ACK src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 [ASSURED] -[UPDATE] tcp 6 120 TIME_WAIT src=172.18.0.1 dst=172.18.0.4 sport=30366 dport=30820 src=169.254.169.3 dst=172.18.0.1 sport=30820 dport=30366 [ASSURED] + [NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 [UNREPLIED] src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 + [NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=169.254.169.3 sport=36366 dport=31746 [UNREPLIED] src=10.244.1.3 dst=172.18.0.1 sport=8080 dport=36366 zone=9 + [NEW] tcp 6 120 SYN_SENT src=172.18.0.1 dst=10.244.1.3 sport=36366 dport=8080 [UNREPLIED] src=10.244.1.3 dst=172.18.0.1 sport=8080 dport=36366 zone=11 + [UPDATE] tcp 6 60 SYN_RECV src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 + [UPDATE] tcp 6 432000 ESTABLISHED src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 [ASSURED] + [NEW] tcp 6 300 ESTABLISHED src=172.18.0.3 dst=172.18.0.1 sport=31746 dport=36366 [UNREPLIED] src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 mark=2 zone=64000 + [UPDATE] tcp 6 120 FIN_WAIT src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 [ASSURED] + [UPDATE] tcp 6 30 LAST_ACK src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 [ASSURED] + [UPDATE] tcp 6 120 TIME_WAIT src=172.18.0.1 dst=172.18.0.3 sport=36366 dport=31746 src=169.254.169.3 dst=172.18.0.1 sport=31746 dport=36366 [ASSURED] ``` ### External Source -> Service -> Host Networked pod This Scenario is a bit different, specifically traffic now needs to be directed from an external source to service and -then to the host itself(a host networked pod) +then to the host itself (a host networked pod) -In this flow, rather than going from breth0 into OVN we shortcircuit the path with physical flows on breth0 +In this flow, rather than going from breth0 into OVN we shortcircuit the path with physical flows on breth0. This is the same for both the gateway modes. ```text host (ovn-worker, 172.18.0.3) @@ -201,16 +200,16 @@ cookie=0x790ba3355d0c209b, duration=113.033s, table=6, n_packets=18, n_bytes=146 cookie=0x790ba3355d0c209b, duration=501.037s, table=7, n_packets=12, n_bytes=1259, idle_age=448, priority=100 actions=output:1 ``` -## Host Traffic +### Host Traffic This section will cover the networking entities hit when traffic travels from a cluster host via a service to either host -networked pods or cluster networked pods +networked pods or cluster networked pods. These flows are the same for both the gateway modes. ### Host -> Service -> OVN Pod This case is similar to steps 3-6 on `External -> Service -> OVN Pod` traffic senario we saw above for local gateway. The traffic will flow from host->PRE-ROUTING iptable rule DNAT towards `169.254.169.3`, which gets routed into `ovn-k8s-mp0` and hits the load balancer on the node-local-switch preserving sourceIP. -### Host -> Service -> Host +### Host -> Service -> Host Networked Pod Again when the backend is a host networked pod we shortcircuit OVN to avoid SNAT and use iptables rules on the host to DNAT directly to the correct host endpoint. @@ -219,9 +218,100 @@ to DNAT directly to the correct host endpoint. [0:0] -A OVN-KUBE-NODEPORT -p tcp -m addrtype --dst-type LOCAL -m tcp --dport 30940 -j REDIRECT --to-ports 8080 ``` -## Intra Cluster traffic +### Intra Cluster traffic For all service traffic that stays in the overlay the flows will remain the same for `externaltrafficpolicy:local`. If the traffic crosses over to the underlay then its not guaranteed to be the same. See https://bugzilla.redhat.com/show_bug.cgi?id=2027270. +## ExternalTrafficPolicy=Cluster + +#### **Local Gateway Mode** + +The implementation of this case differs for local gateway from that for shared gateway. In local gateway all service traffic is sent straight to host (instead of sending it to OVN) to allow users to apply custom routes according to their use cases. + +In local gateway mode, rather than sending the traffic from breth0 into OVN via gateway router, we use flows on breth0 to send it into the host. + +```text + host (ovn-worker, 172.18.0.3) ---- 172.18.0.3 LOCAL(host) -- iptables -- breth0 -- GR -- 10.244.1.3 pod + ^ + ^ + | +eth0--->|breth0| + +``` + +1. Match on the incoming traffic via default flow on `table0`, send it to `table1`: + +``` +cookie=0xdeff105, duration=3189.786s, table=0, n_packets=99979, n_bytes=298029215, priority=50,ip,in_port=eth0 actions=ct(table=1,zone=64000) +``` + +2. Send it out to LOCAL ovs port on breth0 and traffic is delivered to the host: + +``` +cookie=0xdeff105, duration=3189.787s, table=1, n_packets=108, n_bytes=23004, priority=0 actions=NORMAL +``` + +3. In the host, we have an IPtable rule in the PREROUTING chain that DNATs this packet matched on nodePort to its clusterIP:targetPort + +``` +[8:480] -A OVN-KUBE-NODEPORT -p tcp -m addrtype --dst-type LOCAL -m tcp --dport 31842 -j DNAT --to-destination 10.96.67.170:80 +``` + +4. The service route in the host sends this packet back to breth0. + +``` +10.96.0.0/16 via 172.18.0.1 dev breth0 mtu 1400 +``` + +5. On breth0, we have priority 500 flows meant to handle hairpining, that will SNAT the srcIP to the special `169.254.169.2` masqueradeIP and send it to `table2` + +``` +cookie=0xdeff105, duration=3189.786s, table=0, n_packets=11, n_bytes=814, priority=500,ip,in_port=LOCAL,nw_dst=10.96.0.0/16 actions=ct(commit,table=2,zone=64001,nat(src=169.254.169.2)) +``` + +6. In `table2` we have a flow that forwards this to patch port that takes the traffic in OVN: + +``` +cookie=0xdeff105, duration=6.308s, table=2, n_packets=11, n_bytes=814, actions=mod_dl_dst:02:42:ac:12:00:03,output:"patch-breth0_ov" +``` + +7. Traffic enters the GR on the worker node and hits the load-balancer where we DNAT it correctly to the local backends. + +The GR load-balancer on a node with endpoints for the clusterIP will look like this: + +``` +_uuid : b3201caf-3089-4462-b96e-1406fd7c4256 +external_ids : {"k8s.ovn.org/kind"=Service, "k8s.ovn.org/owner"="default/example-service-1"} +health_check : [] +ip_port_mappings : {} +name : "Service_default/example-service-1_TCP_cluster" +options : {event="false", reject="true", skip_snat="false"} +protocol : tcp +selection_fields : [] +vips : {"10.96.67.170:80"="10.244.1.3:8080,10.244.2.3:8080"} +``` + +Response traffic will follow the same path (backend->GR->breth0->host->breth0->eth0). + +7. Return traffic gets matched on the priority 500 flow in `table0` which sends it to `table3`. + +``` +cookie=0xdeff105, duration=3189.786s, table=0, n_packets=10, n_bytes=540, priority=500,ip,in_port="patch-breth0_ov",nw_src=10.96.0.0/16,nw_dst=169.254.169.2 actions=ct(table=3,zone=64001,nat) +``` + +8. In `table3`, we send it to host: + +``` +cookie=0xdeff105, duration=6.308s, table=3, n_packets=10, n_bytes=540, actions=move:NXM_OF_ETH_DST[]->NXM_OF_ETH_SRC[],mod_dl_dst:02:42:ac:12:00:03,LOCAL +``` + +9. From host we send it back to breth0 using: + +``` +cookie=0xdeff105, duration=5992.878s, table=0, n_packets=89312, n_bytes=6154654, idle_age=0, priority=100,ip,in_port=LOCAL actions=ct(commit,zone=64000,exec(load:0x2->NXM_NX_CT_MARK[])),output:eth0 +``` + +where packet leaves the node and goes back to the external entity that initiated the connection. + ## Sources - https://www.asykim.com/blog/deep-dive-into-kubernetes-external-traffic-policies diff --git a/go-controller/go.mod b/go-controller/go.mod index 90981f1b04..cb348dab02 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -21,7 +21,7 @@ require ( github.com/onsi/gomega v1.14.0 github.com/openshift/api v0.0.0-20211201215911-5a82bae32e46 github.com/openshift/client-go v0.0.0-20211202194848-d3f186f2d366 - github.com/ovn-org/libovsdb v0.6.1-0.20220225160119-c8b4494412b1 + github.com/ovn-org/libovsdb v0.6.1-0.20220328142833-2cbe2d093e12 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.11.0 github.com/spf13/afero v1.4.1 @@ -52,8 +52,8 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect - github.com/go-logr/logr v1.2.0 // indirect - github.com/go-logr/stdr v1.1.0 // indirect + github.com/go-logr/logr v1.2.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.2 // indirect diff --git a/go-controller/go.sum b/go-controller/go.sum index 977a0c645e..a4e3640e7e 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -179,11 +179,11 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-logr/logr v1.1.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.0 h1:QK40JKJyMdUDz+h+xvCsru/bJhvG0UxvePV0ufL/AcE= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/stdr v1.1.0 h1:WSypkOvL7AfqHep42aGGLagyxUjJCQFFs/2nIitlVTQ= -github.com/go-logr/stdr v1.1.0/go.mod h1:Xff/PTSzmJ+zDsu/KDy4l6Axizfso1w7QcxLnWTdto4= +github.com/go-logr/logr v1.2.2 h1:ahHml/yUpnlb96Rp8HCvtYVPY8ZYpxq3g7UYchIYwbs= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -399,10 +399,8 @@ github.com/openshift/build-machinery-go v0.0.0-20210712174854-1bb7fd1518d3/go.mo github.com/openshift/client-go v0.0.0-20211202194848-d3f186f2d366 h1:/llKlbXC7F6rqTkAl2GE//9GDR0iDsqJEFvvJ1kLJXg= github.com/openshift/client-go v0.0.0-20211202194848-d3f186f2d366/go.mod h1:HJeHsH6mPyrrOVS/2j1zcztGAJG9ETKbqtlyohs84yY= github.com/ory/dockertest/v3 v3.8.0/go.mod h1:9zPATATlWQru+ynXP+DytBQrsXV7Tmlx7K86H6fQaDo= -github.com/ovn-org/libovsdb v0.6.1-0.20220127023511-a619f0fd93be h1:U8WVtNNTfBKj/5OE3uBe57oNJ+x5KSMl+3hM7iLSbdk= -github.com/ovn-org/libovsdb v0.6.1-0.20220127023511-a619f0fd93be/go.mod h1:aLvY7gPs/vLyJXF+PpZzvWlR5LB4QNJvBYIQMskJLZk= -github.com/ovn-org/libovsdb v0.6.1-0.20220225160119-c8b4494412b1 h1:nz+mlIM2KFAJGhrKb68KBl36rFn2mLAp+kfskm2nfl0= -github.com/ovn-org/libovsdb v0.6.1-0.20220225160119-c8b4494412b1/go.mod h1:aLvY7gPs/vLyJXF+PpZzvWlR5LB4QNJvBYIQMskJLZk= +github.com/ovn-org/libovsdb v0.6.1-0.20220328142833-2cbe2d093e12 h1:0omm+gRCOGkS2JyFoMYAF4NUrd8b4A+wnbooJZKH02g= +github.com/ovn-org/libovsdb v0.6.1-0.20220328142833-2cbe2d093e12/go.mod h1:BQPdnSM2QOKxPwxl7wHJDSPP4B/CDKq3+vzgFW3J5gE= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index addc0d1c9f..9c52c58ae8 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -95,9 +95,13 @@ func (n *NodeController) AddPod(pod *kapi.Pod) error { if !util.PodWantsNetwork(pod) { return nil } + if util.PodCompleted(pod) { + klog.Infof("Cleaning up hybrid overlay pod %s/%s because it has completed", pod.Namespace, pod.Name) + return n.DeletePod(pod) + } podIPs, podMAC, err := getPodDetails(pod) if err != nil { - klog.V(5).Infof("Cleaning up hybrid overlay pod %s/%s because %v", pod.Namespace, pod.Name, err) + klog.Warningf("Cleaning up hybrid overlay pod %s/%s because it has no OVN annotation %v", pod.Namespace, pod.Name, err) return n.DeletePod(pod) } diff --git a/go-controller/pkg/cni/cniserver.go b/go-controller/pkg/cni/cniserver.go index 41afb9926b..7e4ccc49d2 100644 --- a/go-controller/pkg/cni/cniserver.go +++ b/go-controller/pkg/cni/cniserver.go @@ -73,9 +73,10 @@ func NewCNIServer(rundir string, useOVSExternalIDs bool, factory factory.NodeWat podLister: corev1listers.NewPodLister(factory.LocalPodInformer().GetIndexer()), kclient: kclient, kubeAuth: &KubeAPIAuth{ - Kubeconfig: config.Kubernetes.Kubeconfig, - KubeAPIServer: config.Kubernetes.APIServer, - KubeAPIToken: config.Kubernetes.Token, + Kubeconfig: config.Kubernetes.Kubeconfig, + KubeAPIServer: config.Kubernetes.APIServer, + KubeAPIToken: config.Kubernetes.Token, + KubeAPITokenFile: config.Kubernetes.TokenFile, }, } diff --git a/go-controller/pkg/cni/cnishim.go b/go-controller/pkg/cni/cnishim.go index 39b954dd21..deb238ba85 100644 --- a/go-controller/pkg/cni/cnishim.go +++ b/go-controller/pkg/cni/cnishim.go @@ -150,6 +150,7 @@ func kubeClientsetFromConfig(auth *KubeAPIAuth) (*kubernetes.Clientset, error) { Kubeconfig: auth.Kubeconfig, APIServer: auth.KubeAPIServer, Token: auth.KubeAPIToken, + TokenFile: auth.KubeAPITokenFile, CAData: caData, }) } diff --git a/go-controller/pkg/cni/types.go b/go-controller/pkg/cni/types.go index db6d4acd83..286fa5a918 100644 --- a/go-controller/pkg/cni/types.go +++ b/go-controller/pkg/cni/types.go @@ -29,6 +29,9 @@ type KubeAPIAuth struct { KubeAPIServer string `json:"kube-api-server,omitempty"` // KubeAPIToken is a Kubernetes API token (not required if kubeconfig is given) KubeAPIToken string `json:"kube-api-token,omitempty"` + // KubeAPITokenFile is the path to Kubernetes API token + // If set, it is periodically read and takes precedence over KubeAPIToken + KubeAPITokenFile string `json:"kube-api-token-file,omitempty"` // KubeCAData is the Base64-ed Kubernetes API CA certificate data (not required if kubeconfig is given) KubeCAData string `json:"kube-ca-data,omitempty"` } diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 3706ab3d7e..1e5dbb2183 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -283,6 +283,7 @@ type KubernetesConfig struct { CAData []byte APIServer string `gcfg:"apiserver"` Token string `gcfg:"token"` + TokenFile string `gcfg:"tokenFile"` CompatServiceCIDR string `gcfg:"service-cidr"` RawServiceCIDRs string `gcfg:"service-cidrs"` ServiceCIDRs []*net.IPNet @@ -508,6 +509,7 @@ func PrepareTestConfig() error { os.Unsetenv("K8S_CACERT") os.Unsetenv("K8S_APISERVER") os.Unsetenv("K8S_TOKEN") + os.Unsetenv("K8S_TOKEN_FILE") return nil } @@ -875,6 +877,11 @@ var K8sFlags = []cli.Flag{ Usage: "the Kubernetes API authentication token (not required if --k8s-kubeconfig is given)", Destination: &cliConfig.Kubernetes.Token, }, + &cli.StringFlag{ + Name: "k8s-token-file", + Usage: "the path to Kubernetes API token. If set, it is periodically read and takes precedence over k8s-token", + Destination: &cliConfig.Kubernetes.TokenFile, + }, &cli.StringFlag{ Name: "ovn-config-namespace", Usage: "specify a namespace which will contain services to config the OVN databases", @@ -1199,6 +1206,7 @@ type Defaults struct { OvnNorthAddress bool K8sAPIServer bool K8sToken bool + K8sTokenFile bool K8sCert bool } @@ -1263,6 +1271,7 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin saConfig := savedKubernetes if data, err := ioutil.ReadFile(filepath.Join(saPath, kubeServiceAccountFileToken)); err == nil { saConfig.Token = string(data) + saConfig.TokenFile = filepath.Join(saPath, kubeServiceAccountFileToken) } if _, err2 := os.Stat(filepath.Join(saPath, kubeServiceAccountFileCACert)); err2 == nil { saConfig.CACert = filepath.Join(saPath, kubeServiceAccountFileCACert) @@ -1282,6 +1291,7 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin "CACert": "K8S_CACERT", "APIServer": "K8S_APISERVER", "Token": "K8S_TOKEN", + "TokenFile": "K8S_TOKEN_FILE", "HostNetworkNamespace": "OVN_HOST_NETWORK_NAMESPACE", } for k, v := range envVarsMap { @@ -1311,6 +1321,10 @@ func buildKubernetesConfig(exec kexec.Interface, cli, file *config, saPath strin if defaults.K8sToken { Kubernetes.Token = getOVSExternalID(exec, "k8s-api-token") } + if defaults.K8sTokenFile { + Kubernetes.TokenFile = getOVSExternalID(exec, "k8s-api-token-file") + } + if defaults.K8sCert { Kubernetes.CACert = getOVSExternalID(exec, "k8s-ca-certificate") } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 67e14be8ad..02b4d0ff03 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -144,6 +144,7 @@ lflow-cache-limit-kb=100000 kubeconfig=/path/to/kubeconfig apiserver=https://1.2.3.4:6443 token=TG9yZW0gaXBzdW0gZ +tokenFile=/path/to/token cacert=/path/to/kubeca.crt service-cidrs=172.18.0.0/24 no-hostsubnet-nodes=label=another-test-label @@ -267,6 +268,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal("")) gomega.Expect(Kubernetes.CAData).To(gomega.Equal([]byte{})) gomega.Expect(Kubernetes.Token).To(gomega.Equal("")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("")) gomega.Expect(Kubernetes.APIServer).To(gomega.Equal(DefaultAPIServer)) gomega.Expect(Kubernetes.RawServiceCIDRs).To(gomega.Equal("172.16.1.0/24")) gomega.Expect(Kubernetes.RawNoHostSubnetNodes).To(gomega.Equal("")) @@ -309,6 +311,11 @@ var _ = Describe("Config Operations", func() { Cmd: "ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:k8s-api-token", Output: "asadfasdfasrw3atr3r3rf33fasdaa3233", }) + // k8s-api-token-file + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:k8s-api-token-file", + Output: "/new/path/to/token", + }) // k8s-ca-certificate fname, fdata, err := createTempFile("ca.crt") gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -326,6 +333,7 @@ var _ = Describe("Config Operations", func() { OvnNorthAddress: true, K8sAPIServer: true, K8sToken: true, + K8sTokenFile: true, K8sCert: true, }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -336,7 +344,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(fname)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(fdata)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("asadfasdfasrw3atr3r3rf33fasdaa3233")) - + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/new/path/to/token")) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeTCP)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("")) gomega.Expect(OvnNorth.Cert).To(gomega.Equal("")) @@ -372,6 +380,11 @@ var _ = Describe("Config Operations", func() { Cmd: "ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:k8s-api-token", Output: "asadfasdfasrw3atr3r3rf33fasdaa3233", }) + // k8s-api-token-file + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:k8s-api-token-file", + Output: "/new/path/to/token", + }) // k8s-ca-certificate fname, fdata, err := createTempFile("kube-cacert.pem") gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -393,6 +406,7 @@ var _ = Describe("Config Operations", func() { OvnNorthAddress: true, K8sAPIServer: true, K8sToken: true, + K8sTokenFile: true, K8sCert: true, }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -403,6 +417,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(fname)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(fdata)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("asadfasdfasrw3atr3r3rf33fasdaa3233")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/new/path/to/token")) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeTCP)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("")) @@ -441,7 +456,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(caFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(caData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("TG9yZW0gaXBzdW0gZ")) - + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal(tokenFile)) return nil } err2 := app.Run([]string{app.Name}) @@ -459,6 +474,9 @@ var _ = Describe("Config Operations", func() { os.Setenv("K8S_TOKEN", "this is the token test") defer os.Setenv("K8S_TOKEN", "") + os.Setenv("K8S_TOKEN_FILE", "/new/path/to/token") + defer os.Setenv("K8S_TOKEN_FILE", "") + os.Setenv("K8S_APISERVER", "https://9.2.3.4:6443") defer os.Setenv("K8S_APISERVER", "") @@ -478,6 +496,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(kubeCAFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(kubeCAData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("this is the token test")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/new/path/to/token")) gomega.Expect(Kubernetes.APIServer).To(gomega.Equal("https://9.2.3.4:6443")) return nil @@ -525,6 +544,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(kubeCAFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(kubeCAData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("TG9yZW0gaXBzdW0gZ")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/path/to/token")) gomega.Expect(Kubernetes.APIServer).To(gomega.Equal("https://1.2.3.4:6443")) gomega.Expect(Kubernetes.RawServiceCIDRs).To(gomega.Equal("172.18.0.0/24")) gomega.Expect(Default.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ @@ -597,6 +617,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Kubernetes.CACert).To(gomega.Equal(kubeCAFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(kubeCAData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("asdfasdfasdfasfd")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/new/path/to/token")) gomega.Expect(Kubernetes.APIServer).To(gomega.Equal("https://4.4.3.2:8080")) gomega.Expect(Kubernetes.RawServiceCIDRs).To(gomega.Equal("172.15.0.0/24")) gomega.Expect(Kubernetes.RawNoHostSubnetNodes).To(gomega.Equal("test=pass")) @@ -649,6 +670,7 @@ var _ = Describe("Config Operations", func() { "-k8s-apiserver=https://4.4.3.2:8080", "-k8s-cacert=" + kubeCAFile, "-k8s-token=asdfasdfasdfasfd", + "-k8s-token-file=/new/path/to/token", "-k8s-service-cidrs=172.15.0.0/24", "-nb-address=ssl:6.5.4.3:6651", "-no-hostsubnet-nodes=test=pass", @@ -927,6 +949,7 @@ mode=shared gomega.Expect(Kubernetes.CACert).To(gomega.Equal(kubeCAFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(kubeCAData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("asdfasdfasdfasfd")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/new/path/to/token")) gomega.Expect(Kubernetes.APIServer).To(gomega.Equal("https://4.4.3.2:8080")) gomega.Expect(Kubernetes.RawNoHostSubnetNodes).To(gomega.Equal("label=another-test-label")) gomega.Expect(Kubernetes.RawServiceCIDRs).To(gomega.Equal("172.15.0.0/24")) @@ -970,6 +993,7 @@ mode=shared "-k8s-apiserver=https://4.4.3.2:8080", "-k8s-cacert=" + kubeCAFile, "-k8s-token=asdfasdfasdfasfd", + "-k8s-token-file=/new/path/to/token", "-k8s-service-cidr=172.15.0.0/24", "-nb-address=ssl:6.5.4.3:6651,ssl:6.5.4.4:6651,ssl:6.5.4.5:6651", "-nb-client-privkey=/client/privkey", @@ -1021,6 +1045,7 @@ mode=shared gomega.Expect(Kubernetes.CACert).To(gomega.Equal(kubeCAFile)) gomega.Expect(Kubernetes.CAData).To(gomega.Equal(kubeCAData)) gomega.Expect(Kubernetes.Token).To(gomega.Equal("TG9yZW0gaXBzdW0gZ")) + gomega.Expect(Kubernetes.TokenFile).To(gomega.Equal("/path/to/token")) gomega.Expect(Kubernetes.RawServiceCIDRs).To(gomega.Equal("172.18.0.0/24")) return nil diff --git a/go-controller/pkg/libovsdbops/acl.go b/go-controller/pkg/libovsdbops/acl.go index 749c174647..b2d0161446 100644 --- a/go-controller/pkg/libovsdbops/acl.go +++ b/go-controller/pkg/libovsdbops/acl.go @@ -124,7 +124,7 @@ func ensureACLUUID(acl *nbdb.ACL) { } } -func BuildACL(name string, direction nbdb.ACLDirection, priority int, match string, action nbdb.ACLAction, meter string, severity nbdb.ACLSeverity, log bool, externalIds map[string]string) *nbdb.ACL { +func BuildACL(name string, direction nbdb.ACLDirection, priority int, match string, action nbdb.ACLAction, meter string, severity nbdb.ACLSeverity, log bool, externalIds map[string]string, options map[string]string) *nbdb.ACL { name = fmt.Sprintf("%.63s", name) var realName *string @@ -149,9 +149,7 @@ func BuildACL(name string, direction nbdb.ACLDirection, priority int, match stri Log: log, Meter: realMeter, ExternalIDs: externalIds, - } - if direction == nbdb.ACLDirectionFromLport { - acl.Options = map[string]string{"apply-after-lb": "true"} + Options: options, } return acl diff --git a/go-controller/pkg/libovsdbops/loadbalancer.go b/go-controller/pkg/libovsdbops/loadbalancer.go index a8ced83170..68a6b3d750 100644 --- a/go-controller/pkg/libovsdbops/loadbalancer.go +++ b/go-controller/pkg/libovsdbops/loadbalancer.go @@ -3,6 +3,7 @@ package libovsdbops import ( "context" "fmt" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" libovsdbclient "github.com/ovn-org/libovsdb/client" @@ -106,15 +107,16 @@ func createOrUpdateLoadBalancerOps(nbClient libovsdbclient.Client, ops []libovsd // If LoadBalancer does not exist, create it if err == libovsdbclient.ErrNotFound { timeout := types.OVSDBWaitTimeout - ops = append(ops, libovsdb.Operation{ - Op: libovsdb.OperationWait, - Timeout: &timeout, - Table: "Load_Balancer", - Where: []libovsdb.Condition{{Column: "name", Function: libovsdb.ConditionEqual, Value: lb.Name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []libovsdb.Row{{"name": lb.Name}}, - }) + condition := model.Condition{ + Field: &lb.Name, + Function: libovsdb.ConditionEqual, + Value: lb.Name, + } + waitOps, err := nbClient.Where(lb, condition).Wait(libovsdb.WaitConditionNotEqual, &timeout, lb, &lb.Name) + if err != nil { + return nil, err + } + ops = append(ops, waitOps...) ensureLoadBalancerUUID(lb) op, err := nbClient.Create(lb) diff --git a/go-controller/pkg/libovsdbops/model_client.go b/go-controller/pkg/libovsdbops/model_client.go index 1bea0da292..617f6a6c78 100644 --- a/go-controller/pkg/libovsdbops/model_client.go +++ b/go-controller/pkg/libovsdbops/model_client.go @@ -4,9 +4,10 @@ import ( "context" "errors" "fmt" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "reflect" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/model" "github.com/ovn-org/libovsdb/ovsdb" @@ -118,7 +119,7 @@ type OperationModel struct { // Name indicates that during a Create the model will have a predicate // operation to ensure a duplicate txn will not occur. See: // https://bugzilla.redhat.com/show_bug.cgi?id=2042001 - Name string + Name interface{} } // WithClient is useful for ad-hoc override of the targetted OVS DB. Can be used, @@ -295,31 +296,24 @@ func (m *ModelClient) create(opModel *OperationModel) ([]ovsdb.Operation, error) // ACL we would have to use external_ids + name for unique match // However external_ids would be a performance hit, and in one case we use // an empty name and external_ids for addAllowACLFromNode - if len(opModel.Name) > 0 && o[0].Table != "ACL" { + if opModel.Name != nil && o[0].Table != "ACL" { timeout := types.OVSDBWaitTimeout - ops = append(ops, ovsdb.Operation{ - Op: ovsdb.OperationWait, - Timeout: &timeout, - Table: o[0].Table, - Where: []ovsdb.Condition{{Column: "name", Function: ovsdb.ConditionEqual, Value: opModel.Name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []ovsdb.Row{{"name": opModel.Name}}, - }) + condition := model.Condition{ + Field: opModel.Name, + Function: ovsdb.ConditionEqual, + Value: getString(opModel.Name), + } + waitOps, err := m.client.Where(opModel.Model, condition).Wait(ovsdb.WaitConditionNotEqual, &timeout, opModel.Model, opModel.Name) + if err != nil { + return nil, err + } + ops = append(ops, waitOps...) } else if info, err := m.client.Cache().DatabaseModel().NewModelInfo(opModel.Model); err == nil { if name, err := info.FieldByColumn("name"); err == nil { - objName, ok := name.(string) - if !ok { - if strPtr, ok := name.(*string); ok { - if strPtr != nil { - objName = *strPtr - } - } - } + objName := getString(name) if len(objName) > 0 { klog.Warningf("OVSDB Create operation detected without setting opModel Name. Name: %s, %#v", objName, info) - } } } @@ -411,3 +405,15 @@ func addToExistingResult(model interface{}, existingResult interface{}) { resultVal := reflect.Indirect(resultPtr) resultVal.Set(reflect.Append(resultVal, reflect.Indirect(reflect.ValueOf(model)))) } + +func getString(field interface{}) string { + objName, ok := field.(string) + if !ok { + if strPtr, ok := field.(*string); ok { + if strPtr != nil { + objName = *strPtr + } + } + } + return objName +} diff --git a/go-controller/pkg/libovsdbops/portgroup.go b/go-controller/pkg/libovsdbops/portgroup.go index caa16070a9..4afbd45d76 100644 --- a/go-controller/pkg/libovsdbops/portgroup.go +++ b/go-controller/pkg/libovsdbops/portgroup.go @@ -2,6 +2,7 @@ package libovsdbops import ( "context" + libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/model" libovsdb "github.com/ovn-org/libovsdb/ovsdb" @@ -66,15 +67,16 @@ func createOrUpdatePortGroupOps(nbClient libovsdbclient.Client, ops []libovsdb.O if err == libovsdbclient.ErrNotFound { timeout := types.OVSDBWaitTimeout - ops = append(ops, libovsdb.Operation{ - Op: libovsdb.OperationWait, - Timeout: &timeout, - Table: "Port_Group", - Where: []libovsdb.Condition{{Column: "name", Function: libovsdb.ConditionEqual, Value: pg.Name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []libovsdb.Row{{"name": pg.Name}}, - }) + condition := model.Condition{ + Field: &pg.Name, + Function: libovsdb.ConditionEqual, + Value: pg.Name, + } + waitOps, err := nbClient.Where(pg, condition).Wait(libovsdb.WaitConditionNotEqual, &timeout, pg, &pg.Name) + if err != nil { + return nil, err + } + ops = append(ops, waitOps...) op, err := nbClient.Create(pg) if err != nil { return nil, err diff --git a/go-controller/pkg/libovsdbops/switch.go b/go-controller/pkg/libovsdbops/switch.go index eaf7ec06bb..1aadf00916 100644 --- a/go-controller/pkg/libovsdbops/switch.go +++ b/go-controller/pkg/libovsdbops/switch.go @@ -320,7 +320,7 @@ func AddACLToNodeSwitch(nbClient libovsdbclient.Client, nodeName string, nodeACL }, }, { - Name: nodeSwitch.Name, + Name: &nodeSwitch.Name, Model: &nodeSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == nodeName }, OnModelMutations: []interface{}{ diff --git a/go-controller/pkg/metrics/ovn_db.go b/go-controller/pkg/metrics/ovn_db.go index 2638fcb998..b37fb00e57 100644 --- a/go-controller/pkg/metrics/ovn_db.go +++ b/go-controller/pkg/metrics/ovn_db.go @@ -246,12 +246,14 @@ func ovnDBSizeMetricsUpdater(basePath, direction, database string) { if size, err := getOvnDBSizeViaPath(basePath, direction, database); err != nil { klog.Errorf("Failed to update OVN DB size metric: %v", err) } else { - // To update not only values but also labels for metrics, we use Reset() to delete previous labels+value - metricDBSize.Reset() metricDBSize.WithLabelValues(database).Set(float64(size)) } } +func resetOvnDbSizeMetric() { + metricDBSize.Reset() +} + // isOvnDBFoundViaPath attempts to find the OVN DBs, return false if not found. func isOvnDBFoundViaPath(basePath string, dirDbMap map[string]string) bool { enabled := true @@ -293,8 +295,6 @@ func ovnDBMemoryMetricsUpdater(direction, database string) { // kvPair will be of the form monitors:2 fields := strings.Split(kvPair, ":") if value, err := strconv.ParseFloat(fields[1], 64); err == nil { - // To update not only values but also labels for metrics, we use Reset() to delete previous labels+value - metricOVNDBMonitor.Reset() metricOVNDBMonitor.WithLabelValues(database).Set(value) } else { klog.Errorf("Failed to parse the monitor's value %s to float64: err(%v)", @@ -304,7 +304,6 @@ func ovnDBMemoryMetricsUpdater(direction, database string) { // kvPair will be of the form sessions:2 fields := strings.Split(kvPair, ":") if value, err := strconv.ParseFloat(fields[1], 64); err == nil { - metricOVNDBSessions.Reset() metricOVNDBSessions.WithLabelValues(database).Set(value) } else { klog.Errorf("Failed to parse the sessions' value %s to float64: err(%v)", @@ -314,6 +313,11 @@ func ovnDBMemoryMetricsUpdater(direction, database string) { } } +func resetOvnDbMemoryMetrics() { + metricOVNDBMonitor.Reset() + metricOVNDBSessions.Reset() +} + var ( ovnDbVersion string nbDbSchemaVersion string @@ -420,6 +424,14 @@ func RegisterOvnDBMetrics(clientset kubernetes.Interface, k8sNodeName string) { ovnDBMemoryMetricsUpdater(direction, database) } time.Sleep(30 * time.Second) + // To update not only values but also labels for metrics, we use Reset() to delete previous labels+value + if dbIsClustered { + resetOvnDbClusterMetrics() + } + if dbFoundViaPath { + resetOvnDbSizeMetric() + } + resetOvnDbMemoryMetrics() } }() } @@ -537,47 +549,49 @@ func ovnDBClusterStatusMetricsUpdater(direction, database string) { klog.Errorf(err.Error()) return } - // To update not only values but also labels for metrics, we use Reset() to delete previous labels+value - metricDBClusterCID.Reset() metricDBClusterCID.WithLabelValues(database, clusterStatus.cid).Set(1) - metricDBClusterSID.Reset() metricDBClusterSID.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(1) - metricDBClusterServerStatus.Reset() metricDBClusterServerStatus.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid, clusterStatus.status).Set(1) - metricDBClusterTerm.Reset() metricDBClusterTerm.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.term) - metricDBClusterServerRole.Reset() metricDBClusterServerRole.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid, clusterStatus.role).Set(1) - metricDBClusterServerVote.Reset() metricDBClusterServerVote.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid, clusterStatus.vote).Set(1) - metricDBClusterElectionTimer.Reset() metricDBClusterElectionTimer.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.electionTimer) - metricDBClusterLogIndexStart.Reset() metricDBClusterLogIndexStart.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.logIndexStart) - metricDBClusterLogIndexNext.Reset() metricDBClusterLogIndexNext.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.logIndexNext) - metricDBClusterLogNotCommitted.Reset() metricDBClusterLogNotCommitted.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.logNotCommitted) - metricDBClusterLogNotApplied.Reset() metricDBClusterLogNotApplied.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.logNotApplied) - metricDBClusterConnIn.Reset() metricDBClusterConnIn.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.connIn) - metricDBClusterConnOut.Reset() metricDBClusterConnOut.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.connOut) - metricDBClusterConnInErr.Reset() metricDBClusterConnInErr.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.connInErr) - metricDBClusterConnOutErr.Reset() metricDBClusterConnOutErr.WithLabelValues(database, clusterStatus.cid, clusterStatus.sid).Set(clusterStatus.connOutErr) } + +func resetOvnDbClusterMetrics() { + metricDBClusterCID.Reset() + metricDBClusterSID.Reset() + metricDBClusterServerStatus.Reset() + metricDBClusterTerm.Reset() + metricDBClusterServerRole.Reset() + metricDBClusterServerVote.Reset() + metricDBClusterElectionTimer.Reset() + metricDBClusterLogIndexStart.Reset() + metricDBClusterLogIndexNext.Reset() + metricDBClusterLogNotCommitted.Reset() + metricDBClusterLogNotApplied.Reset() + metricDBClusterConnIn.Reset() + metricDBClusterConnOut.Reset() + metricDBClusterConnInErr.Reset() + metricDBClusterConnOutErr.Reset() +} diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 9f4847c322..2ad05e98ad 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -440,18 +440,12 @@ var _ = Describe("Node Operations", func() { }, "filter": {}, } - expectedFlows := []string{ - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=eth0, tcp, tp_dst=31111, actions=ct(commit,zone=64003,table=6)", - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=LOCAL, tcp, tp_src=31111, actions=ct(zone=64003,table=7)", - "cookie=0xe745ecf105, priority=110, table=6, actions=output:LOCAL", - "cookie=0xe745ecf105, priority=110, table=7, actions=output:eth0", - } f4 := iptV4.(*util.FakeIPTables) err := f4.MatchState(expectedTables) Expect(err).NotTo(HaveOccurred()) flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] - Expect(flows).To(Equal(expectedFlows)) + Expect(flows).To(BeNil()) fakeOvnNode.shutdown() return nil } @@ -597,30 +591,103 @@ var _ = Describe("Node Operations", func() { }, "filter": {}, } - expectedNodePortFlows := []string{ - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=eth0, tcp, tp_dst=31111, actions=ct(commit,zone=64003,table=6)", - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=LOCAL, tcp, tp_src=31111, actions=ct(zone=64003,table=7)", - "cookie=0xe745ecf105, priority=110, table=6, actions=output:LOCAL", - "cookie=0xe745ecf105, priority=110, table=7, actions=output:eth0", + expectedLBIngressFlows := []string{ + "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=5.5.5.5, actions=output:LOCAL", + } + expectedLBExternalIPFlows := []string{ + "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=1.1.1.1, actions=output:LOCAL", } + + f4 := iptV4.(*util.FakeIPTables) + err := f4.MatchState(expectedTables) + Expect(err).NotTo(HaveOccurred()) + flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + Expect(flows).To(BeNil()) + flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + Expect(flows).To(Equal(expectedLBIngressFlows)) + flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + Expect(flows).To(Equal(expectedLBExternalIPFlows)) + + fakeOvnNode.shutdown() + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + + It("inits iptables rules and openflows with LoadBalancer where ETP=cluster, LGW mode", func() { + app.Action = func(ctx *cli.Context) error { + externalIP := "1.1.1.1" + config.Gateway.Mode = config.GatewayModeLocal + service := *newService("service1", "namespace1", "10.129.0.2", + []v1.ServicePort{ + { + NodePort: int32(31111), + Protocol: v1.ProtocolTCP, + Port: int32(8080), + }, + }, + v1.ServiceTypeLoadBalancer, + []string{externalIP}, + v1.ServiceStatus{ + LoadBalancer: v1.LoadBalancerStatus{ + Ingress: []v1.LoadBalancerIngress{{ + IP: "5.5.5.5", + }}, + }, + }, + false, // ETP=cluster + ) + // endpoints.Subset is empty and yet this will come under !hasLocalHostNetEp case + endpoints := *newEndpoints("service1", "namespace1", []v1.EndpointSubset{}) + + fakeOvnNode.start(ctx, + &v1.ServiceList{ + Items: []v1.Service{ + service, + }, + }, + &endpoints, + ) + + fNPW.watchFactory = fakeOvnNode.watcher + startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig) + fNPW.AddService(&service) + + expectedTables := map[string]util.FakeTable{ + "nat": { + "PREROUTING": []string{ + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + }, + "OUTPUT": []string{ + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + }, + "OVN-KUBE-NODEPORT": []string{ + fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Spec.Ports[0].NodePort, service.Spec.ClusterIP, service.Spec.Ports[0].Port), + }, + "OVN-KUBE-SNAT-MGMTPORT": []string{}, + "OVN-KUBE-EXTERNALIP": []string{ + fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, service.Status.LoadBalancer.Ingress[0].IP, service.Spec.Ports[0].Port, service.Spec.ClusterIP, service.Spec.Ports[0].Port), + fmt.Sprintf("-p %s -d %s --dport %v -j DNAT --to-destination %s:%v", service.Spec.Ports[0].Protocol, externalIP, service.Spec.Ports[0].Port, service.Spec.ClusterIP, service.Spec.Ports[0].Port), + }, + }, + "filter": {}, + } + expectedLBIngressFlows := []string{ - "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, tcp, nw_dst=5.5.5.5, tp_dst=8080, actions=ct(commit,zone=64003,table=6)", - "cookie=0x10c6b89e483ea111, priority=110, in_port=LOCAL, tcp, nw_src=5.5.5.5, tp_src=8080, actions=ct(zone=64003,table=7)", - "cookie=0xe745ecf105, priority=110, table=6, actions=output:LOCAL", - "cookie=0xe745ecf105, priority=110, table=7, actions=output:eth0", + "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=5.5.5.5, actions=output:LOCAL", } expectedLBExternalIPFlows := []string{ - "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, tcp, nw_dst=1.1.1.1, tp_dst=8080, actions=ct(commit,zone=64003,table=6)", - "cookie=0x71765945a31dc2f1, priority=110, in_port=LOCAL, tcp, nw_src=1.1.1.1, tp_src=8080, actions=ct(zone=64003,table=7)", - "cookie=0xe745ecf105, priority=110, table=6, actions=output:LOCAL", - "cookie=0xe745ecf105, priority=110, table=7, actions=output:eth0", + "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=1.1.1.1, actions=output:LOCAL", } f4 := iptV4.(*util.FakeIPTables) err := f4.MatchState(expectedTables) Expect(err).NotTo(HaveOccurred()) flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] - Expect(flows).To(Equal(expectedNodePortFlows)) + Expect(flows).To(BeNil()) flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] Expect(flows).To(Equal(expectedLBIngressFlows)) flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] @@ -708,14 +775,14 @@ var _ = Describe("Node Operations", func() { "cookie=0x453ae29bcbbc08bd, priority=110, in_port=patch-breth0_ov, tcp, tp_src=31111, actions=output:eth0", } expectedLBIngressFlows := []string{ + "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=5.5.5.5, actions=output:LOCAL", "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, tcp, nw_dst=5.5.5.5, tp_dst=8080, actions=check_pkt_larger(1414)->reg0[0],resubmit(,11)", "cookie=0x10c6b89e483ea111, priority=110, in_port=patch-breth0_ov, tcp, nw_src=5.5.5.5, tp_src=8080, actions=output:eth0", - "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=5.5.5.5, actions=output:LOCAL", } expectedLBExternalIPFlows := []string{ + "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=1.1.1.1, actions=output:LOCAL", "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, tcp, nw_dst=1.1.1.1, tp_dst=8080, actions=check_pkt_larger(1414)->reg0[0],resubmit(,11)", "cookie=0x71765945a31dc2f1, priority=110, in_port=patch-breth0_ov, tcp, nw_src=1.1.1.1, tp_src=8080, actions=output:eth0", - "cookie=0x71765945a31dc2f1, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=1.1.1.1, actions=output:LOCAL", } f4 := iptV4.(*util.FakeIPTables) @@ -1035,10 +1102,10 @@ var _ = Describe("Node Operations", func() { It("manages iptables rules with ExternalIP", func() { app.Action = func(ctx *cli.Context) error { externalIP := "10.10.10.1" - externalIPPort := int32(8034) fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-ofctl show ", }) + externalIPPort := int32(8034) service := *newService("service1", "namespace1", "10.129.0.2", []v1.ServicePort{ { @@ -1265,18 +1332,12 @@ var _ = Describe("Node Operations", func() { }, "filter": {}, } - expectedFlows := []string{ - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=eth0, tcp, tp_dst=31111, actions=ct(commit,zone=64003,table=6)", - "cookie=0x453ae29bcbbc08bd, priority=110, in_port=LOCAL, tcp, tp_src=31111, actions=ct(zone=64003,table=7)", - "cookie=0xe745ecf105, priority=110, table=6, actions=output:LOCAL", - "cookie=0xe745ecf105, priority=110, table=7, actions=output:eth0", - } f4 := iptV4.(*util.FakeIPTables) err := f4.MatchState(expectedTables) Expect(err).NotTo(HaveOccurred()) flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] - Expect(flows).To(Equal(expectedFlows)) + Expect(flows).To(BeNil()) fNPW.DeleteService(&service) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index e8d2250ce5..2cdc3afd73 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -76,54 +76,18 @@ type serviceConfig struct { hasLocalHostNetworkEp bool } -// createFlowsForNonLocalHostNetEp adds the necessary openflows for a service when its backed by non-local-host-networked endpoints -// This function is currently used only in LGW mode and when ETP=local for the service -// cookie is the hash value of openflow cookie, svcPort is the nodePort of the service, flowProtocol is tcp/udp/sctp in v4 or v6 modes, -// externalIPOrLBIngressIP is either externalIP or LB.status.ingress, nwDst&nwSrc are constants used to match on the destinations based on the flowprotocol -func (npw *nodePortWatcher) createFlowsForNonLocalHostNetEp(cookie string, svcPort int32, flowProtocol string, externalIPOrLBIngressIP, nwDst, nwSrc string) []string { - var nodeportFlowsResult []string - if len(externalIPOrLBIngressIP) != 0 { - // if externalIPOrLBIngressIP is set then this service is of type LB/EIP - nodeportFlowsResult = append(nodeportFlowsResult, - fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, tp_dst=%d, actions=ct(commit,zone=%d,table=6)", - cookie, npw.ofportPhys, flowProtocol, nwDst, externalIPOrLBIngressIP, svcPort, HostNodePortCTZone), - fmt.Sprintf("cookie=%s, priority=110, in_port=LOCAL, %s, %s=%s, tp_src=%d, actions=ct(zone=%d,table=7)", - cookie, flowProtocol, nwSrc, externalIPOrLBIngressIP, svcPort, HostNodePortCTZone)) - } else { - // if externalIPOrLBIngressIP is not set then this service is of type NP - nodeportFlowsResult = append(nodeportFlowsResult, - // Traffic destined for ETP=local backed by OVN-K pod in LGW mode is matched on the svcNP and sent to table6. - fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, tp_dst=%d, actions=ct(commit,zone=%d,table=6)", - cookie, npw.ofportPhys, flowProtocol, svcPort, HostNodePortCTZone), - // Return traffic is matched on the svcNP as the sourcePort and gets un-DNATed back to nodeIP. - fmt.Sprintf("cookie=%s, priority=110, in_port=LOCAL, %s, tp_src=%d, actions=ct(zone=%d,table=7)", - cookie, flowProtocol, svcPort, HostNodePortCTZone)) - } - nodeportFlowsResult = append(nodeportFlowsResult, - // table 6, Sends the packet to Host. Note that the constant etp svc cookie is used since this flow would be - // same for all such services. - fmt.Sprintf("cookie=%s, priority=110, table=6, actions=output:LOCAL", - etpSvcOpenFlowCookie), - // table 7, Sends the reply packet back out eth0 to the external client. Note that the constant etp svc - // cookie is used since this would be same for all such services. - fmt.Sprintf("cookie=%s, priority=110, table=7, actions=output:%s", - etpSvcOpenFlowCookie, npw.ofportPhys)) - return nodeportFlowsResult -} - // updateServiceFlowCache handles managing breth0 gateway flows for ingress traffic towards kubernetes services // (nodeport, external, ingress). By default incoming traffic into the node is steered directly into OVN (case3 below). // // case1: If a service has externalTrafficPolicy=local, and has host-networked endpoints local to the node (hasLocalHostNetworkEp), // traffic instead will be steered directly into the host and DNAT-ed to the targetPort on the host. // -// case2: Only applicable for LGW mode: If a service has externalTrafficPolicy=local, and it doesn't have host-networked -// endpoints local to the node (!hasLocalHostNetworkEp - including empty endpoint.Subset), traffic will be steered into LOCAL -// preserving sourceIP and IPTables will steer this traffic into OVN via ovn-k8s-mp0. +// case2: All other types of services in SGW mode i.e: +// case2a: if externalTrafficPolicy=cluster + SGW mode, traffic will be steered into OVN via GR. +// case2b: if externalTrafficPolicy=local + !hasLocalHostNetworkEp + SGW mode, traffic will be steered into OVN via GR. +// +// NOTE: If LGW mode, the default flow will take care of sending traffic to host irrespective of service flow type. // -// case3: All other types of services i.e: -// case3a: if externalTrafficPolicy=cluster, irrespective of gateway modes, traffic will be steered into OVN via GR. -// case3b: if externalTrafficPolicy=local+!hasLocalHostNetworkEp+SGW mode, traffic will be steered into OVN via GR. // `add` parameter indicates if the flows should exist or be removed from the cache // `hasLocalHostNetworkEp` indicates if at least one host networked endpoint exists for this service which is local to this node. func (npw *nodePortWatcher) updateServiceFlowCache(service *kapi.Service, add, hasLocalHostNetworkEp bool) { @@ -167,7 +131,6 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *kapi.Service, add, h npw.ofm.deleteFlowsByKey(key) continue } - // (astoycos) TODO combine flow generation into a single function // This allows external traffic ingress when the svc's ExternalTrafficPolicy is // set to Local, and the backend pod is HostNetworked. We need to add // Flows that will DNAT all traffic coming into nodeport to the nodeIP:Port and @@ -176,8 +139,8 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *kapi.Service, add, h // case1 (see function description for details) var nodeportFlows []string klog.V(5).Infof("Adding flows on breth0 for Nodeport Service %s in Namespace: %s since ExternalTrafficPolicy=local", service.Name, service.Namespace) - // table 0, This rule matches on all traffic with dst port == NodePort, DNAT's it to the correct NodeIP - // If ipv6 make sure to choose the ipv6 node address), and sends to table 6 + // table 0, This rule matches on all traffic with dst port == NodePort, DNAT's the nodePort to the svc targetPort + // If ipv6 make sure to choose the ipv6 node address for rule if strings.Contains(flowProtocol, "6") { nodeportFlows = append(nodeportFlows, fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, tp_dst=%d, actions=ct(commit,zone=%d,nat(dst=[%s]:%s),table=6)", @@ -199,20 +162,15 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *kapi.Service, add, h // cookie is used since this would be same for all such services. fmt.Sprintf("cookie=%s, priority=110, table=7, "+ "actions=output:%s", etpSvcOpenFlowCookie, npw.ofportPhys)) - npw.ofm.updateFlowCacheEntry(key, nodeportFlows) - } else if isServiceTypeETPLocal && !hasLocalHostNetworkEp && config.Gateway.Mode == config.GatewayModeLocal { + } else if config.Gateway.Mode == config.GatewayModeShared { // case2 (see function description for details) - var nodeportFlows []string - klog.V(5).Infof("Adding flows on breth0 for Nodeport Service %s in Namespace: %s since ExternalTrafficPolicy=local", service.Name, service.Namespace) - nodeportFlows = npw.createFlowsForNonLocalHostNetEp(cookie, svcPort.NodePort, flowProtocol, "", "", "") - npw.ofm.updateFlowCacheEntry(key, nodeportFlows) - } else { - // case3 (see function description for details) npw.ofm.updateFlowCacheEntry(key, []string{ + // table=0, matches on service traffic towards nodePort and sends it to OVN pipeline fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, tp_dst=%d, "+ "actions=%s", cookie, npw.ofportPhys, flowProtocol, svcPort.NodePort, actions), + // table=0, matches on return traffic from service nodePort and sends it out to primary node interface (br-ex) fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, tp_src=%d, "+ "actions=output:%s", cookie, npw.ofportPatch, flowProtocol, svcPort.NodePort, npw.ofportPhys)}) @@ -247,13 +205,12 @@ func (npw *nodePortWatcher) updateServiceFlowCache(service *kapi.Service, add, h // case1: If a service has externalTrafficPolicy=local, and has host-networked endpoints local to the node (hasLocalHostNetworkEp), // traffic instead will be steered directly into the host and DNAT-ed to the targetPort on the host. // -// case2: Only applicable for LGW mode: If a service has externalTrafficPolicy=local, and it doesn't have host-networked -// endpoints local to the node (!hasLocalHostNetworkEp - including empty endpoint.Subset), traffic will be steered into LOCAL -// preserving sourceIP and IPTables will steer this traffic into OVN via ovn-k8s-mp0. +// case2: All other types of services in SGW mode i.e: +// case2a: if externalTrafficPolicy=cluster + SGW mode, traffic will be steered into OVN via GR. +// case2b: if externalTrafficPolicy=local + !hasLocalHostNetworkEp + SGW mode, traffic will be steered into OVN via GR. +// +// NOTE: If LGW mode, the default flow will take care of sending traffic to host irrespective of service flow type. // -// case3: All other types of services i.e: -// case3a: if externalTrafficPolicy=cluster, irrespective of gateway modes, traffic will be steered into OVN via GR. -// case3b: if externalTrafficPolicy=local+!hasLocalHostNetworkEp+SGW mode, traffic will be steered into OVN via GR. // `add` parameter indicates if the flows should exist or be removed from the cache // `hasLocalHostNetworkEp` indicates if at least one host networked endpoint exists for this service which is local to this node. // `protocol` is TCP/UDP/SCTP as set in the svc.Port @@ -284,6 +241,9 @@ func (npw *nodePortWatcher) createLbAndExternalSvcFlows(service *kapi.Service, s npw.ofm.deleteFlowsByKey(key) return nil } + // add the ARP bypass flow regarless of service type or gateway modes since its applicable in all scenarios. + arpFlow := npw.generateArpBypassFlow(protocol, externalIPOrLBIngressIP, cookie) + externalIPFlows := []string{arpFlow} // This allows external traffic ingress when the svc's ExternalTrafficPolicy is // set to Local, and the backend pod is HostNetworked. We need to add // Flows that will DNAT all external traffic destined for the lb/externalIP service @@ -293,49 +253,43 @@ func (npw *nodePortWatcher) createLbAndExternalSvcFlows(service *kapi.Service, s isServiceTypeETPLocal := util.ServiceExternalTrafficPolicyLocal(service) if isServiceTypeETPLocal && hasLocalHostNetworkEp { // case1 (see function description for details) - var nodeportFlows []string klog.V(5).Infof("Adding flows on breth0 for %s Service %s in Namespace: %s since ExternalTrafficPolicy=local", ipType, service.Name, service.Namespace) - // table 0, This rule matches on all traffic with dst ip == LoadbalancerIP / externalIP, DNAT's it to the correct NodeIP + // table 0, This rule matches on all traffic with dst ip == LoadbalancerIP / externalIP, DNAT's the nodePort to the svc targetPort // If ipv6 make sure to choose the ipv6 node address for rule if strings.Contains(flowProtocol, "6") { - nodeportFlows = append(nodeportFlows, + externalIPFlows = append(externalIPFlows, fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, tp_dst=%d, actions=ct(commit,zone=%d,nat(dst=[%s]:%s),table=6)", cookie, npw.ofportPhys, flowProtocol, nwDst, externalIPOrLBIngressIP, svcPort.Port, HostNodePortCTZone, npw.gatewayIPv6, svcPort.TargetPort.String())) } else { - nodeportFlows = append(nodeportFlows, + externalIPFlows = append(externalIPFlows, fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, tp_dst=%d, actions=ct(commit,zone=%d,nat(dst=%s:%s),table=6)", cookie, npw.ofportPhys, flowProtocol, nwDst, externalIPOrLBIngressIP, svcPort.Port, HostNodePortCTZone, npw.gatewayIPv4, svcPort.TargetPort.String())) } - nodeportFlows = append(nodeportFlows, - // table 6, Sends the packet to the host + externalIPFlows = append(externalIPFlows, + // table 6, Sends the packet to Host. Note that the constant etp svc cookie is used since this flow would be + // same for all such services. fmt.Sprintf("cookie=%s, priority=110, table=6, actions=output:LOCAL", etpSvcOpenFlowCookie), // table 0, Matches on return traffic, i.e traffic coming from the host networked pod's port, and unDNATs fmt.Sprintf("cookie=%s, priority=110, in_port=LOCAL, %s, tp_src=%s, actions=ct(commit,zone=%d nat,table=7)", cookie, flowProtocol, svcPort.TargetPort.String(), HostNodePortCTZone), - // table 7, the packet back out eth0 to the external client - fmt.Sprintf("cookie=%s, priority=110, table=7, "+ - "actions=output:%s", etpSvcOpenFlowCookie, npw.ofportPhys)) - - npw.ofm.updateFlowCacheEntry(key, nodeportFlows) - - } else if isServiceTypeETPLocal && !hasLocalHostNetworkEp && config.Gateway.Mode == config.GatewayModeLocal { + // table 7, Sends the reply packet back out eth0 to the external client. Note that the constant etp svc + // cookie is used since this would be same for all such services. + fmt.Sprintf("cookie=%s, priority=110, table=7, actions=output:%s", + etpSvcOpenFlowCookie, npw.ofportPhys)) + } else if config.Gateway.Mode == config.GatewayModeShared { // case2 (see function description for details) - var nodeportFlows []string - klog.V(5).Infof("Adding flows on breth0 for %s Service %s in Namespace: %s since ExternalTrafficPolicy=local", ipType, service.Name, service.Namespace) - nodeportFlows = npw.createFlowsForNonLocalHostNetEp(cookie, svcPort.Port, flowProtocol, externalIPOrLBIngressIP, nwDst, nwSrc) - npw.ofm.updateFlowCacheEntry(key, nodeportFlows) - } else { - // case3 (see function description for details) - npw.ofm.updateFlowCacheEntry(key, []string{ + externalIPFlows = append(externalIPFlows, + // table=0, matches on service traffic towards externalIP or LB ingress and sends it to OVN pipeline fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, tp_dst=%d, "+ "actions=%s", cookie, npw.ofportPhys, flowProtocol, nwDst, externalIPOrLBIngressIP, svcPort.Port, actions), + // table=0, matches on return traffic from service externalIP or LB ingress and sends it out to primary node interface (br-ex) fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, tp_src=%d, "+ "actions=output:%s", - cookie, npw.ofportPatch, flowProtocol, nwSrc, externalIPOrLBIngressIP, svcPort.Port, npw.ofportPhys), - npw.generateArpBypassFlow(protocol, externalIPOrLBIngressIP, cookie)}) + cookie, npw.ofportPatch, flowProtocol, nwSrc, externalIPOrLBIngressIP, svcPort.Port, npw.ofportPhys)) } + npw.ofm.updateFlowCacheEntry(key, externalIPFlows) return nil } diff --git a/go-controller/pkg/ovn/address_set/address_set.go b/go-controller/pkg/ovn/address_set/address_set.go index 4296aa2919..8c6cabe1b9 100644 --- a/go-controller/pkg/ovn/address_set/address_set.go +++ b/go-controller/pkg/ovn/address_set/address_set.go @@ -114,15 +114,16 @@ func ensureOvnAddressSet(nbClient libovsdbclient.Client, name string) (*ovnAddre if len(addrSet.UUID) == 0 { ops := make([]ovsdb.Operation, 0, 2) timeout := types.OVSDBWaitTimeout - ops = append(ops, ovsdb.Operation{ - Op: ovsdb.OperationWait, - Timeout: &timeout, - Table: "Address_Set", - Where: []ovsdb.Condition{{Column: "name", Function: ovsdb.ConditionEqual, Value: addrSet.Name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []ovsdb.Row{{"name": addrSet.Name}}, - }) + condition := model.Condition{ + Field: &addrSet.Name, + Function: ovsdb.ConditionEqual, + Value: addrSet.Name, + } + waitOps, err := as.nbClient.Where(addrSet, condition).Wait(ovsdb.WaitConditionNotEqual, &timeout, addrSet, &addrSet.Name) + if err != nil { + return nil, err + } + ops = append(ops, waitOps...) // hack used to make TransactAndCheckAndSetUUIDs track the model correctly addrSet.UUID = libovsdbops.BuildNamedUUID() // create the address_set with no IPs @@ -365,15 +366,16 @@ func newOvnAddressSet(nbClient libovsdbclient.Client, name string, ips []net.IP) } else { ops := make([]ovsdb.Operation, 0, 2) timeout := types.OVSDBWaitTimeout - ops = append(ops, ovsdb.Operation{ - Op: ovsdb.OperationWait, - Timeout: &timeout, - Table: "Address_Set", - Where: []ovsdb.Condition{{Column: "name", Function: ovsdb.ConditionEqual, Value: name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []ovsdb.Row{{"name": name}}, - }) + condition := model.Condition{ + Field: &addrSet.Name, + Function: ovsdb.ConditionEqual, + Value: addrSet.Name, + } + waitOps, err := as.nbClient.Where(addrSet, condition).Wait(ovsdb.WaitConditionNotEqual, &timeout, addrSet, &addrSet.Name) + if err != nil { + return nil, err + } + ops = append(ops, waitOps...) if addrSet.UUID == "" { // hack used to make TransactAndCheckAndSetUUIDs track the model correctly addrSet.UUID = libovsdbops.BuildNamedUUID() diff --git a/go-controller/pkg/ovn/egressfirewall.go b/go-controller/pkg/ovn/egressfirewall.go index 18c177f22d..aa76c9297b 100644 --- a/go-controller/pkg/ovn/egressfirewall.go +++ b/go-controller/pkg/ovn/egressfirewall.go @@ -379,7 +379,7 @@ func (oc *Controller) createEgressFirewallRules(priority int, match, action, ext switches = append(switches, &lsw) opModels = append(opModels, []libovsdbops.OperationModel{ { - Name: lsw.Name, + Name: &lsw.Name, Model: &lsw, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == lsn }, OnModelMutations: []interface{}{ diff --git a/go-controller/pkg/ovn/egressfirewall_test.go b/go-controller/pkg/ovn/egressfirewall_test.go index b567a6785d..b655b1fd0c 100644 --- a/go-controller/pkg/ovn/egressfirewall_test.go +++ b/go-controller/pkg/ovn/egressfirewall_test.go @@ -88,6 +88,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "none"}, + nil, ) purgeACL.UUID = libovsdbops.BuildNamedUUID() @@ -101,6 +102,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "default"}, + nil, ) keepACL.UUID = libovsdbops.BuildNamedUUID() @@ -115,6 +117,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "default"}, + nil, ) otherACL.UUID = libovsdbops.BuildNamedUUID() @@ -248,6 +251,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() @@ -336,6 +340,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv6ACL.UUID = libovsdbops.BuildNamedUUID() @@ -432,6 +437,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) udpACL.UUID = libovsdbops.BuildNamedUUID() @@ -529,6 +535,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() @@ -633,6 +640,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for local gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() @@ -712,6 +720,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "none"}, + nil, ) purgeACL.UUID = libovsdbops.BuildNamedUUID() @@ -725,6 +734,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "default"}, + nil, ) keepACL.UUID = libovsdbops.BuildNamedUUID() @@ -739,6 +749,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "default"}, + nil, ) otherACL.UUID = libovsdbops.BuildNamedUUID() @@ -867,6 +878,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() @@ -950,6 +962,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv6ACL.UUID = libovsdbops.BuildNamedUUID() @@ -1043,6 +1056,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) udpACL.UUID = libovsdbops.BuildNamedUUID() @@ -1124,6 +1138,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() @@ -1212,6 +1227,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations for shared gateway mode", "", false, map[string]string{"egressFirewall": "namespace1"}, + nil, ) ipv4ACL.UUID = libovsdbops.BuildNamedUUID() diff --git a/go-controller/pkg/ovn/egressgw.go b/go-controller/pkg/ovn/egressgw.go index 354ff318b9..1b287271af 100644 --- a/go-controller/pkg/ovn/egressgw.go +++ b/go-controller/pkg/ovn/egressgw.go @@ -222,23 +222,10 @@ func (oc *Controller) addGWRoutesForNamespace(namespace string, egress gatewayIn if err != nil { return fmt.Errorf("failed to get all the pods (%v)", err) } - // TODO (trozet): use the go bindings here and batch commands for _, pod := range existingPods { - podNsName := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} - if config.Gateway.DisableSNATMultipleGWs { - logicalPort := util.GetLogicalPortName(pod.Namespace, pod.Name) - portInfo, err := oc.logicalPortCache.get(logicalPort) - if err != nil { - klog.Warningf("Unable to get port %s in cache for SNAT rule removal", logicalPort) - } else { - // delete all perPodSNATs (if this pod was controlled by egressIP controller, it will stop working since - // a pod cannot be used for multiple-external-gateways and egressIPs at the same time) - if err = deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, []*net.IPNet{}, portInfo.ips); err != nil { - klog.Error(err.Error()) - } - } + if util.PodCompleted(pod) || !util.PodWantsNetwork(pod) { + continue } - podIPs := make([]*net.IPNet, 0) for _, podIP := range pod.Status.PodIPs { cidr := podIP.IP + GetIPFullMask(podIP.IP) @@ -248,6 +235,18 @@ func (oc *Controller) addGWRoutesForNamespace(namespace string, egress gatewayIn } podIPs = append(podIPs, ipNet) } + if len(podIPs) == 0 { + klog.Warningf("Will not add gateway routes pod %s/%s. IPs not found!", pod.Namespace, pod.Name) + continue + } + if config.Gateway.DisableSNATMultipleGWs { + // delete all perPodSNATs (if this pod was controlled by egressIP controller, it will stop working since + // a pod cannot be used for multiple-external-gateways and egressIPs at the same time) + if err = deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, []*net.IPNet{}, podIPs); err != nil { + klog.Error(err.Error()) + } + } + podNsName := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} if err := oc.addGWRoutesForPod([]*gatewayInfo{&egress}, podIPs, podNsName, pod.Spec.NodeName); err != nil { return err } @@ -296,7 +295,7 @@ func (oc *Controller) createBFDStaticRoute(bfdEnabled bool, gw net.IP, podIP, gr } }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gr @@ -364,7 +363,7 @@ func (oc *Controller) deletePodGWRoute(routeInfo *externalRouteInfo, podIP, gw, node := util.GetWorkerFromGatewayRouter(gr) if entry := routeInfo.podExternalRoutes[podIP]; len(entry) == 0 { if err := oc.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { - return err + return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.podName, err) } } @@ -377,23 +376,27 @@ func (oc *Controller) deletePodGWRoute(routeInfo *externalRouteInfo, podIP, gw, // deletePodExternalGW detects if a given pod is acting as an external GW and removes all routes in all namespaces // associated with that pod -func (oc *Controller) deletePodExternalGW(pod *kapi.Pod) { +func (oc *Controller) deletePodExternalGW(pod *kapi.Pod) (err error) { podRoutingNamespaceAnno := pod.Annotations[routingNamespaceAnnotation] if podRoutingNamespaceAnno == "" { - return + return nil } klog.Infof("Deleting routes for external gateway pod: %s, for namespace(s) %s", pod.Name, podRoutingNamespaceAnno) for _, namespace := range strings.Split(podRoutingNamespaceAnno, ",") { - oc.deletePodGWRoutesForNamespace(pod, namespace) + if err := oc.deletePodGWRoutesForNamespace(pod, namespace); err != nil { + // if we encounter error while deleting things in one namespace we return and don't try subsequent namespaces + return fmt.Errorf("failed to delete ecmp routes for pod %s in namespace %s", pod.Name, namespace) + } } + return nil } // deletePodGwRoutesForNamespace handles deleting all routes in a namespace for a specific pod GW -func (oc *Controller) deletePodGWRoutesForNamespace(pod *kapi.Pod, namespace string) { +func (oc *Controller) deletePodGWRoutesForNamespace(pod *kapi.Pod, namespace string) (err error) { nsInfo, nsUnlock := oc.getNamespaceLocked(namespace, false) if nsInfo == nil { - return + return nil } podGWKey := makePodGWKey(pod) // check if any gateways were stored for this pod @@ -404,20 +407,28 @@ func (oc *Controller) deletePodGWRoutesForNamespace(pod *kapi.Pod, namespace str if !ok || len(foundGws.gws) == 0 { klog.Infof("No gateways found to remove for annotated gateway pod: %s on namespace: %s", pod, namespace) - return + return nil } gws := sets.NewString() for _, gwIP := range foundGws.gws { gws.Insert(gwIP.String()) } - oc.deleteGWRoutesForNamespace(namespace, gws) + if err := oc.deleteGWRoutesForNamespace(namespace, gws); err != nil { + // add the entry back to nsInfo for retrying later + nsInfo, nsUnlock := oc.getNamespaceLocked(namespace, false) + // we add back all the gw routes as we don't know which specific route for which pod error-ed out + nsInfo.routingExternalPodGWs[podGWKey] = foundGws + nsUnlock() + return fmt.Errorf("failed to delete GW routes for pod %s: %w", pod.Name, err) + } + return nil } // deleteGwRoutesForNamespace handles deleting routes to gateways for a pod on a specific GR. // If a set of gateways is given, only routes for that gateway are deleted. If no gateways // are given, all routes for the namespace are deleted. -func (oc *Controller) deleteGWRoutesForNamespace(namespace string, matchGWs sets.String) { +func (oc *Controller) deleteGWRoutesForNamespace(namespace string, matchGWs sets.String) error { deleteAll := (matchGWs == nil || matchGWs.Len() == 0) for _, routeInfo := range oc.getRouteInfosForNamespace(namespace) { routeInfo.Lock() @@ -429,8 +440,9 @@ func (oc *Controller) deleteGWRoutesForNamespace(namespace string, matchGWs sets for gw, gr := range routes { if deleteAll || matchGWs.Has(gw) { if err := oc.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { - klog.Errorf(err.Error()) - continue + // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods + routeInfo.Unlock() + return fmt.Errorf("delete pod GW route failed: %w", err) } delete(routes, gw) } @@ -438,13 +450,14 @@ func (oc *Controller) deleteGWRoutesForNamespace(namespace string, matchGWs sets } routeInfo.Unlock() } + return nil } // deleteGwRoutesForPod handles deleting all routes to gateways for a pod IP on a specific GR -func (oc *Controller) deleteGWRoutesForPod(name ktypes.NamespacedName, podIPNets []*net.IPNet) { +func (oc *Controller) deleteGWRoutesForPod(name ktypes.NamespacedName, podIPNets []*net.IPNet) (err error) { routeInfo := oc.deleteRouteInfoLocked(name) if routeInfo == nil { - return + return nil } defer routeInfo.Unlock() @@ -460,12 +473,13 @@ func (oc *Controller) deleteGWRoutesForPod(name ktypes.NamespacedName, podIPNets } for gw, gr := range routes { if err := oc.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { - klog.Errorf(err.Error()) - continue + // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods + return fmt.Errorf("delete pod GW route failed: %w", err) } delete(routes, gw) } } + return nil } // addEgressGwRoutesForPod handles adding all routes to gateways for a pod on a specific GR @@ -683,7 +697,7 @@ func (oc *Controller) addHybridRoutePolicyForPod(podIP net.IP, node string) erro }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -1053,7 +1067,7 @@ func (oc *Controller) cleanExGwECMPRoutes() { ovnRoute, err) } else { if err := oc.cleanUpBFDEntry(ovnRoute.nextHop, ovnRoute.router, prefix); err != nil { - klog.Errorf(err.Error()) + klog.Errorf("Cannot clean up BFD entry: %w", err) } } diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 1d7229af1c..a56016fda4 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -277,6 +277,9 @@ func (oc *Controller) reconcileEgressIP(old, new *egressipv1.EgressIP) (err erro return err } } + if util.PodCompleted(pod) { + continue + } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { if err := oc.addPodEgressIPAssignments(name, newEIP.Status.Items, pod); err != nil { return err @@ -1308,6 +1311,10 @@ func (oc *Controller) generateCacheForEgressIP(eIPs []interface{}) (map[string]e continue } for _, pod := range pods { + if util.PodCompleted(pod) { + continue + } + // FIXME(trozet): potential race where pod is not yet added in the cache by the pod handler logicalPort, err := oc.logicalPortCache.get(util.GetLogicalPortName(pod.Namespace, pod.Name)) if err != nil { klog.Errorf("Error getting logical port %s, err: %v", util.GetLogicalPortName(pod.Namespace, pod.Name), err) @@ -1944,7 +1951,7 @@ func (e *egressIPController) createEgressReroutePolicy(filterOption, egressIPNam }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -2279,7 +2286,7 @@ func (oc *Controller) createLogicalRouterPolicy(match string, priority int) erro }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ diff --git a/go-controller/pkg/ovn/gateway_init.go b/go-controller/pkg/ovn/gateway_init.go index 72a2794d65..ed4946ccfc 100644 --- a/go-controller/pkg/ovn/gateway_init.go +++ b/go-controller/pkg/ovn/gateway_init.go @@ -67,7 +67,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, opModels := []libovsdbops.OperationModel{ { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gatewayRouter }, OnModelUpdates: []interface{}{ @@ -128,7 +128,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == types.OVNJoinSwitch }, OnModelMutations: []interface{}{ @@ -165,7 +165,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gatewayRouter }, OnModelMutations: []interface{}{ @@ -217,7 +217,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gatewayRouter }, OnModelMutations: []interface{}{ @@ -287,7 +287,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gatewayRouter }, OnModelMutations: []interface{}{ @@ -329,7 +329,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -382,7 +382,7 @@ func (oc *Controller) gatewayInit(nodeName string, clusterIPSubnet []*net.IPNet, }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -506,7 +506,7 @@ func (oc *Controller) addExternalSwitch(prefix, interfaceID, nodeName, gatewayRo } opModels := []libovsdbops.OperationModel{ { - Name: externalLogicalSwitch.Name, + Name: &externalLogicalSwitch.Name, Model: &externalLogicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == externalSwitch }, }, @@ -545,7 +545,7 @@ func (oc *Controller) addExternalSwitch(prefix, interfaceID, nodeName, gatewayRo }, }, { - Name: externalLogicalSwitch.Name, + Name: &externalLogicalSwitch.Name, Model: &externalLogicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == externalSwitch }, OnModelMutations: []interface{}{ @@ -589,7 +589,7 @@ func (oc *Controller) addExternalSwitch(prefix, interfaceID, nodeName, gatewayRo }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == gatewayRouter }, OnModelMutations: []interface{}{ @@ -626,7 +626,7 @@ func (oc *Controller) addExternalSwitch(prefix, interfaceID, nodeName, gatewayRo }, }, { - Name: externalLogicalSwitch.Name, + Name: &externalLogicalSwitch.Name, Model: &externalLogicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == externalSwitch }, OnModelMutations: []interface{}{ @@ -795,7 +795,7 @@ func (oc *Controller) createPolicyBasedRoutes(match, priority, nexthops string) }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ diff --git a/go-controller/pkg/ovn/gress_policy.go b/go-controller/pkg/ovn/gress_policy.go index e2bb63d8dc..9098b3ea1e 100644 --- a/go-controller/pkg/ovn/gress_policy.go +++ b/go-controller/pkg/ovn/gress_policy.go @@ -360,10 +360,12 @@ func (gp *gressPolicy) buildLocalPodACLs(portGroupName, aclLogging string) []*nb // buildACLAllow builds an allow-related ACL for a given given match func (gp *gressPolicy) buildACLAllow(match, l4Match string, ipBlockCIDR int, aclLogging string) *nbdb.ACL { var direction string + var options map[string]string if gp.policyType == knet.PolicyTypeIngress { direction = nbdb.ACLDirectionToLport } else { direction = nbdb.ACLDirectionFromLport + options = map[string]string{"apply-after-lb": "true"} } priority := types.DefaultAllowPriority action := nbdb.ACLActionAllowRelated @@ -394,7 +396,7 @@ func (gp *gressPolicy) buildACLAllow(match, l4Match string, ipBlockCIDR int, acl policyTypeNum: policyTypeIndex, } - acl := libovsdbops.BuildACL(aclName, direction, priority, match, action, types.OvnACLLoggingMeter, getACLLoggingSeverity(aclLogging), aclLogging != "", externalIds) + acl := libovsdbops.BuildACL(aclName, direction, priority, match, action, types.OvnACLLoggingMeter, getACLLoggingSeverity(aclLogging), aclLogging != "", externalIds, options) return acl } diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 869f444c46..89ba486b50 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -171,6 +171,25 @@ func (manager *LogicalSwitchManager) GetSwitchSubnets(nodeName string) []*net.IP return nil } +// AllocateUntilFull used for unit testing only, allocates the rest of the node subnet +func (manager *LogicalSwitchManager) AllocateUntilFull(nodeName string) error { + manager.RLock() + defer manager.RUnlock() + lsi, ok := manager.cache[nodeName] + if !ok { + return fmt.Errorf("unable to allocate ips, node: %s does not exist in logical switch manager", nodeName) + } else if len(lsi.ipams) == 0 { + return fmt.Errorf("unable to allocate ips for node: %s. logical switch manager has no IPAM", nodeName) + } + var err error + for err != ipam.ErrFull { + for _, ipam := range lsi.ipams { + _, err = ipam.AllocateNext() + } + } + return nil +} + // AllocateIPs will block off IPs in the ipnets slice as already allocated // for a given switch func (manager *LogicalSwitchManager) AllocateIPs(nodeName string, ipnets []*net.IPNet) error { @@ -198,8 +217,8 @@ func (manager *LogicalSwitchManager) AllocateIPs(nodeName string, ipnets []*net. for relIdx, relIPNet := range allocated { if relErr := lsi.ipams[relIdx].Release(relIPNet.IP); relErr != nil { klog.Errorf("Error while releasing IP: %s, err: %v", relIPNet.IP, relErr) - } else { - klog.Warningf("Reserved IP: %s were released", relIPNet.IP.String()) + } else if relIPNet.IP != nil { + klog.Warningf("Reserved IP: %s was released", relIPNet.IP.String()) } } } @@ -254,9 +273,10 @@ func (manager *LogicalSwitchManager) AllocateNextIPs(nodeName string) ([]*net.IP for relIdx, relIPNet := range ipnets { if relErr := lsi.ipams[relIdx].Release(relIPNet.IP); relErr != nil { klog.Errorf("Error while releasing IP: %s, err: %v", relIPNet.IP, relErr) + } else if relIPNet.IP != nil { + klog.Warningf("Reserved IP: %s was released", relIPNet.IP.String()) } } - klog.Warningf("Allocated IPs: %s were released", util.JoinIPNetIPs(ipnets, " ")) } }() diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 435a6287cb..0da1709b6c 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -328,7 +328,7 @@ func (oc *Controller) StartClusterMaster() error { // mention that field in OnModelUpdates or ModelPredicate. opModels := []libovsdbops.OperationModel{ { - Name: loadBalancerGroup.Name, + Name: &loadBalancerGroup.Name, Model: &loadBalancerGroup, }, } @@ -381,7 +381,7 @@ func (oc *Controller) SetupMaster(existingNodeNames []string) error { } opModels := []libovsdbops.OperationModel{ { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, }, @@ -444,7 +444,7 @@ func (oc *Controller) SetupMaster(existingNodeNames []string) error { } opModels = []libovsdbops.OperationModel{ { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == types.OVNJoinSwitch }, }, @@ -494,7 +494,7 @@ func (oc *Controller) SetupMaster(existingNodeNames []string) error { }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -524,7 +524,7 @@ func (oc *Controller) SetupMaster(existingNodeNames []string) error { }, }, { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == types.OVNJoinSwitch }, OnModelMutations: []interface{}{ @@ -559,7 +559,7 @@ func (oc *Controller) addNodeLogicalSwitchPort(logicalSwitchName, portName, port }, }, { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == logicalSwitchName }, OnModelMutations: []interface{}{ @@ -627,7 +627,7 @@ func (oc *Controller) syncNodeManagementPort(node *kapi.Node, hostSubnets []*net }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -757,7 +757,7 @@ func (oc *Controller) syncNodeClusterRouterPort(node *kapi.Node, hostSubnets []* }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -782,7 +782,7 @@ func (oc *Controller) syncNodeClusterRouterPort(node *kapi.Node, hostSubnets []* // Set the gateway chassis of the LRP. (Use "Update" so that the old value, if any, would be replaced) opModels = []libovsdbops.OperationModel{ { - Name: gatewayChassis.Name, + Name: &gatewayChassis.Name, Model: &gatewayChassis, OnModelUpdates: []interface{}{ &gatewayChassis.ChassisName, @@ -879,7 +879,7 @@ func (oc *Controller) ensureNodeLogicalNetwork(node *kapi.Node, hostSubnets []*n }, }, { - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == types.OVNClusterRouter }, OnModelMutations: []interface{}{ @@ -888,7 +888,7 @@ func (oc *Controller) ensureNodeLogicalNetwork(node *kapi.Node, hostSubnets []*n ErrNotFound: true, }, { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == nodeName }, OnModelUpdates: []interface{}{ @@ -951,7 +951,7 @@ func (oc *Controller) ensureNodeLogicalNetwork(node *kapi.Node, hostSubnets []*n // Create the Node's Logical Switch and set it's subnet opModels = []libovsdbops.OperationModel{ { - Name: logicalSwitch.Name, + Name: &logicalSwitch.Name, Model: &logicalSwitch, OnModelMutations: []interface{}{ &logicalSwitch.OtherConfig, diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 0b6fda1d03..f7bdd498dd 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -121,7 +121,7 @@ func (oc *Controller) addPodToNamespace(ns string, ips []*net.IPNet) (*gatewayIn return oc.getRoutingExternalGWs(nsInfo), oc.getRoutingPodGWs(nsInfo), nsInfo.hybridOverlayExternalGW, ops, nil } -func (oc *Controller) deletePodFromNamespace(ns string, portInfo *lpInfo) ([]ovsdb.Operation, error) { +func (oc *Controller) deletePodFromNamespace(ns string, podIfAddrs []*net.IPNet, portUUID string) ([]ovsdb.Operation, error) { nsInfo, nsUnlock := oc.getNamespaceLocked(ns, true) if nsInfo == nil { return nil, nil @@ -130,14 +130,14 @@ func (oc *Controller) deletePodFromNamespace(ns string, portInfo *lpInfo) ([]ovs var ops []ovsdb.Operation var err error if nsInfo.addressSet != nil { - if ops, err = nsInfo.addressSet.DeleteIPsReturnOps(createIPAddressSlice(portInfo.ips)); err != nil { + if ops, err = nsInfo.addressSet.DeleteIPsReturnOps(createIPAddressSlice(podIfAddrs)); err != nil { return nil, err } } // Remove the port from the multicast allow policy. - if oc.multicastSupport && nsInfo.multicastEnabled { - if err = podDeleteAllowMulticastPolicy(oc.nbClient, ns, portInfo); err != nil { + if oc.multicastSupport && nsInfo.multicastEnabled && len(portUUID) > 0 { + if err = podDeleteAllowMulticastPolicy(oc.nbClient, ns, portUUID); err != nil { return nil, err } } @@ -312,20 +312,30 @@ func (oc *Controller) updateNamespace(old, newer *kapi.Namespace) { } for _, pod := range existingPods { logicalPort := util.GetLogicalPortName(pod.Namespace, pod.Name) - portInfo, err := oc.logicalPortCache.get(logicalPort) + if !util.PodWantsNetwork(pod) { + continue + } + podIPs, err := util.GetAllPodIPs(pod) if err != nil { - klog.Warningf("Unable to get port %s in cache for SNAT rule removal", logicalPort) - } else { + klog.Warningf("Unable to get pod %q IPs for SNAT rule removal", logicalPort) + } + ips := make([]*net.IPNet, 0, len(podIPs)) + for _, podIP := range podIPs { + ips = append(ips, &net.IPNet{IP: podIP}) + } + if len(ips) > 0 { if extIPs, err := getExternalIPsGRSNAT(oc.watchFactory, pod.Spec.NodeName); err != nil { klog.Error(err.Error()) - } else if err = deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, extIPs, portInfo.ips); err != nil { + } else if err = deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, extIPs, ips); err != nil { klog.Error(err.Error()) } } } } } else { - oc.deleteGWRoutesForNamespace(old.Name, nil) + if err := oc.deleteGWRoutesForNamespace(old.Name, nil); err != nil { + klog.Error(err.Error()) + } nsInfo.routingExternalGWs = gatewayInfo{} } exGateways, err := parseRoutingExternalGWAnnotation(gwAnnotation) @@ -421,7 +431,9 @@ func (oc *Controller) deleteNamespace(ns *kapi.Namespace) { delete(nsInfo.networkPolicies, np.name) } } - oc.deleteGWRoutesForNamespace(ns.Name, nil) + if err := oc.deleteGWRoutesForNamespace(ns.Name, nil); err != nil { + klog.Errorf("Failed to delete GW routes for namespace: %s, error: %v", ns.Name, err) + } oc.multicastDeleteNamespace(ns, nsInfo) } diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index 1955f9d190..d7d19bbdbf 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -5,7 +5,6 @@ import ( "context" "encoding/json" "fmt" - "math/rand" "net" "reflect" "sync" @@ -36,12 +35,10 @@ import ( kapi "k8s.io/api/core/v1" kapisnetworking "k8s.io/api/networking/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" - "k8s.io/apimachinery/pkg/types" ktypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/informers" @@ -194,7 +191,8 @@ type Controller struct { v6HostSubnetsUsed float64 // Map of pods that need to be retried, and the timestamp of when they last failed - retryPods map[types.UID]*retryEntry + // The key is a string which holds "namespace_podName" + retryPods map[string]*retryEntry retryPodsLock sync.Mutex // channel to indicate we need to retry pods immediately @@ -217,6 +215,11 @@ type retryEntry struct { backoffSec time.Duration // whether to include this pod in retry iterations ignore bool + // used to indicate if add events need to be retried + needsAdd bool + // used to indicate if delete event needs to be retried; + // this will hold a copy of its value from the oc.logicalSwitchPort cache + needsDel *lpInfo } type retryNetPolEntry struct { @@ -301,7 +304,7 @@ func NewOvnController(ovnClient *util.OVNClientset, wf *factory.WatchFactory, st loadBalancerGroupUUID: "", aclLoggingEnabled: true, joinSwIPManager: nil, - retryPods: make(map[types.UID]*retryEntry), + retryPods: make(map[string]*retryEntry), retryPodsChan: make(chan struct{}, 1), retryNetPolices: make(map[string]*retryNetPolEntry), retryPolicyChan: make(chan struct{}, 1), @@ -455,112 +458,6 @@ func (oc *Controller) recordPodEvent(addErr error, pod *kapi.Pod) { } } -// iterateRetryPods checks if any outstanding pods have been waiting for 60 seconds of last known failure -// then tries to re-add them if so -// updateAll forces all pods to be attempted to be retried regardless of the 1 minute delay -func (oc *Controller) iterateRetryPods(updateAll bool) { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - now := time.Now() - for uid, podEntry := range oc.retryPods { - if podEntry.ignore { - continue - } - - pod := podEntry.pod - podDesc := fmt.Sprintf("[%s/%s/%s]", pod.UID, pod.Namespace, pod.Name) - // it could be that the Pod got deleted, but Pod's DeleteFunc has not been called yet, so don't retry - kPod, err := oc.watchFactory.GetPod(pod.Namespace, pod.Name) - if err != nil && errors.IsNotFound(err) { - klog.Infof("%s pod not found in the informers cache, not going to retry pod setup", podDesc) - delete(oc.retryPods, uid) - continue - } - - if !util.PodScheduled(kPod) { - klog.V(5).Infof("Retry: %s not scheduled", podDesc) - continue - } - podEntry.backoffSec = (podEntry.backoffSec * 2) - if podEntry.backoffSec > 60 { - podEntry.backoffSec = 60 - } - backoff := (podEntry.backoffSec * time.Second) + (time.Duration(rand.Intn(500)) * time.Millisecond) - podTimer := podEntry.timeStamp.Add(backoff) - if updateAll || now.After(podTimer) { - klog.Infof("%s retry pod setup", podDesc) - - if oc.ensurePod(nil, kPod, true) { - klog.Infof("%s pod setup successful", podDesc) - delete(oc.retryPods, uid) - } else { - klog.Infof("%s setup retry failed; will try again later", podDesc) - oc.retryPods[uid] = &retryEntry{pod, time.Now(), podEntry.backoffSec, false} - } - } else { - klog.V(5).Infof("%s retry pod not after timer yet, time: %s", podDesc, podTimer) - } - } -} - -// checkAndDeleteRetryPod deletes a specific entry from the map, if it existed, returns true -func (oc *Controller) checkAndDeleteRetryPod(uid types.UID) bool { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - if _, ok := oc.retryPods[uid]; ok { - delete(oc.retryPods, uid) - return true - } - return false -} - -// checkAndSkipRetryPod sets a specific entry from the map to be ignored for subsequent retries -// if it existed, returns true -func (oc *Controller) checkAndSkipRetryPod(uid types.UID) bool { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - if entry, ok := oc.retryPods[uid]; ok { - entry.ignore = true - return true - } - return false -} - -// unSkipRetryPod ensures a pod is no longer ignored for retry loop -func (oc *Controller) unSkipRetryPod(pod *kapi.Pod) { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - if entry, ok := oc.retryPods[pod.UID]; ok { - entry.ignore = false - } -} - -// initRetryPod tracks a failed pod to potentially retry later -// initially it is marked as skipped for retry loop (ignore = true) -func (oc *Controller) initRetryPod(pod *kapi.Pod) { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - if entry, ok := oc.retryPods[pod.UID]; ok { - entry.timeStamp = time.Now() - } else { - oc.retryPods[pod.UID] = &retryEntry{pod, time.Now(), 1, true} - } -} - -// addRetryPods adds multiple pods to retry later -func (oc *Controller) addRetryPods(pods []kapi.Pod) { - oc.retryPodsLock.Lock() - defer oc.retryPodsLock.Unlock() - for _, pod := range pods { - pod := pod - if entry, ok := oc.retryPods[pod.UID]; ok { - entry.timeStamp = time.Now() - } else { - oc.retryPods[pod.UID] = &retryEntry{&pod, time.Now(), 1, false} - } - } -} - func exGatewayAnnotationsChanged(oldPod, newPod *kapi.Pod) bool { return oldPod.Annotations[routingNamespaceAnnotation] != newPod.Annotations[routingNamespaceAnnotation] || oldPod.Annotations[routingNetworkAnnotation] != newPod.Annotations[routingNetworkAnnotation] || @@ -571,48 +468,54 @@ func networkStatusAnnotationsChanged(oldPod, newPod *kapi.Pod) bool { return oldPod.Annotations[nettypes.NetworkStatusAnnot] != newPod.Annotations[nettypes.NetworkStatusAnnot] } -// ensurePod tries to set up a pod. It returns success or failure; failure -// indicates the pod should be retried later. -func (oc *Controller) ensurePod(oldPod, pod *kapi.Pod, addPort bool) bool { +// ensurePod tries to set up a pod. It returns nil on success and error on failure; failure +// indicates the pod set up should be retried later. +func (oc *Controller) ensurePod(oldPod, pod *kapi.Pod, addPort bool) error { // Try unscheduled pods later if !util.PodScheduled(pod) { - return false + return fmt.Errorf("failed to ensurePod %s/%s since it is not yet scheduled", pod.Namespace, pod.Name) } if oldPod != nil && (exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod)) { // No matter if a pod is ovn networked, or host networked, we still need to check for exgw // annotations. If the pod is ovn networked and is in update reschedule, addLogicalPort will take // care of updating the exgw updates - oc.deletePodExternalGW(oldPod) + if err := oc.deletePodExternalGW(oldPod); err != nil { + return fmt.Errorf("ensurePod failed %s/%s: %w", pod.Namespace, pod.Name, err) + } } if util.PodWantsNetwork(pod) && addPort { if err := oc.addLogicalPort(pod); err != nil { - klog.Errorf(err.Error()) - oc.recordPodEvent(err, pod) - return false + return fmt.Errorf("addLogicalPort failed for %s/%s: %w", pod.Namespace, pod.Name, err) } } else { // either pod is host-networked or its an update for a normal pod (addPort=false case) if oldPod == nil || exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod) { if err := oc.addPodExternalGW(pod); err != nil { - klog.Errorf(err.Error()) - oc.recordPodEvent(err, pod) - return false + return fmt.Errorf("addPodExternalGW failed for %s/%s: %w", pod.Namespace, pod.Name, err) } } } - return true + return nil } -func (oc *Controller) requestRetryPods() { - select { - case oc.retryPodsChan <- struct{}{}: - klog.V(5).Infof("Iterate retry pods requested") - default: - klog.V(5).Infof("Iterate retry pods already requested") +// removePod tried to tear down a pod. It returns nil on success and error on failure; +// failure indicates the pod tear down should be retried later. +func (oc *Controller) removePod(pod *kapi.Pod, portInfo *lpInfo) error { + if !util.PodWantsNetwork(pod) { + if err := oc.deletePodExternalGW(pod); err != nil { + return fmt.Errorf("unable to delete external gateway routes for pod %s: %w", + getPodNamespacedName(pod), err) + } + return nil } + if err := oc.deleteLogicalPort(pod, portInfo); err != nil { + return fmt.Errorf("deleteLogicalPort failed for pod %s: %w", + getPodNamespacedName(pod), err) + } + return nil } // WatchPods starts the watching of Pod resource and calls back the appropriate handler logic @@ -637,12 +540,53 @@ func (oc *Controller) WatchPods() { AddFunc: func(obj interface{}) { pod := obj.(*kapi.Pod) oc.metricsRecorder.AddPod(pod.UID) - oc.initRetryPod(pod) - if !oc.ensurePod(nil, pod, true) { + oc.checkAndSkipRetryPod(pod) + // in case ovnkube-master is restarted and gets all the add events with completed pods + if util.PodCompleted(pod) { + // pod is in completed state, remove it + klog.Infof("Detected completed pod: %s. Will remove.", getPodNamespacedName(pod)) + oc.initRetryDelPod(pod) + oc.removeAddRetry(pod) + oc.logicalPortCache.remove(util.GetLogicalPortName(pod.Namespace, pod.Name)) + retryEntry := oc.getPodRetryEntry(pod) + var portInfo *lpInfo + if retryEntry != nil { + // retryEntry shouldn't be nil since we usually add the pod to retryCache above + portInfo = retryEntry.needsDel + } + if err := oc.removePod(pod, portInfo); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to delete completed pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.unSkipRetryPod(pod) + return + } + oc.checkAndDeleteRetryPod(pod) + return + } + // need to add new pod + oc.initRetryAddPod(pod) + if retryEntry := oc.getPodRetryEntry(pod); retryEntry != nil && retryEntry.needsDel != nil { + klog.Infof("Detected leftover old pod during new pod add with the same name: %s. "+ + "Attempting deletion of leftover old...", getPodNamespacedName(pod)) + if err := oc.removePod(pod, retryEntry.needsDel); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to delete pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.unSkipRetryPod(pod) + return + } + // deletion was a success; remove delete retry entry + oc.removeDeleteRetry(pod) + } + if err := oc.ensurePod(nil, pod, true); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to add pod %s, error: %v", + getPodNamespacedName(pod), err) oc.unSkipRetryPod(pod) return } - oc.checkAndDeleteRetryPod(pod.UID) + oc.checkAndDeleteRetryPod(pod) }, UpdateFunc: func(old, newer interface{}) { oldPod := old.(*kapi.Pod) @@ -662,23 +606,75 @@ func (oc *Controller) WatchPods() { podNs, podName) return } - if !oc.ensurePod(oldPod, pod, oc.checkAndSkipRetryPod(pod.UID)) { + oc.checkAndSkipRetryPod(pod) + if retryEntry := oc.getPodRetryEntry(pod); retryEntry != nil && retryEntry.needsDel != nil { + klog.Infof("Detected leftover old pod during new pod add with the same name: %s. "+ + "Attempting deletion of leftover old...", getPodNamespacedName(pod)) + if err := oc.removePod(pod, retryEntry.needsDel); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to delete pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.unSkipRetryPod(pod) + return + } + // deletion was a success; remove delete retry entry + oc.removeDeleteRetry(pod) + } else if util.PodCompleted(pod) { + // pod is in completed state, remove it + klog.Infof("Detected completed pod: %s. Will remove.", getPodNamespacedName(pod)) + oc.initRetryDelPod(pod) + oc.removeAddRetry(pod) + oc.logicalPortCache.remove(util.GetLogicalPortName(pod.Namespace, pod.Name)) + retryEntry := oc.getPodRetryEntry(pod) + var portInfo *lpInfo + if retryEntry != nil { + // retryEntry shouldn't be nil since we usually add the pod to retryCache above + portInfo = retryEntry.needsDel + } + if err := oc.removePod(pod, portInfo); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to delete completed pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.unSkipRetryPod(pod) + return + } + oc.checkAndDeleteRetryPod(pod) + return + } + + if err := oc.ensurePod(oldPod, pod, oc.checkAndSkipRetryPod(pod)); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to update pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.initRetryAddPod(pod) // unskip failed pod for next retry iteration oc.unSkipRetryPod(pod) return } - oc.checkAndDeleteRetryPod(pod.UID) + oc.checkAndDeleteRetryPod(pod) }, DeleteFunc: func(obj interface{}) { pod := obj.(*kapi.Pod) oc.metricsRecorder.CleanPod(pod.UID) - oc.checkAndDeleteRetryPod(pod.UID) - if !util.PodWantsNetwork(pod) { - oc.deletePodExternalGW(pod) + oc.initRetryDelPod(pod) + // we have a copy of portInfo in the retry cache now, we can remove it from + // logicalPortCache so that we don't race with a new add pod that comes with + // the same name. + oc.logicalPortCache.remove(util.GetLogicalPortName(pod.Namespace, pod.Name)) + retryEntry := oc.getPodRetryEntry(pod) + var portInfo *lpInfo + if retryEntry != nil { + // retryEntry shouldn't be nil since we usually add the pod to retryCache above + portInfo = retryEntry.needsDel + } + if err := oc.removePod(pod, portInfo); err != nil { + oc.recordPodEvent(err, pod) + klog.Errorf("Failed to delete pod %s, error: %v", + getPodNamespacedName(pod), err) + oc.unSkipRetryPod(pod) return } - // deleteLogicalPort will take care of removing exgw for ovn networked pods - oc.deleteLogicalPort(pod) + oc.checkAndDeleteRetryPod(pod) }, }, oc.syncPods) klog.Infof("Bootstrapping existing pods and cleaning stale pods took %v", time.Since(start)) @@ -1028,8 +1024,15 @@ func (oc *Controller) WatchEgressIPPods() { UpdateFunc: func(oldObj, newObj interface{}) { oldPod := oldObj.(*kapi.Pod) newPod := newObj.(*kapi.Pod) - if err := oc.reconcileEgressIPPod(oldPod, newPod); err != nil { - klog.Errorf("Unable to update egress IP matching pod: %s/%s, err: %v", newPod.Name, newPod.Namespace, err) + if util.PodCompleted(newPod) { + if err := oc.reconcileEgressIPPod(oldPod, nil); err != nil { + klog.Errorf("Unable to delete egress IP matching completed pod: %s/%s, err: %v", + oldPod.Name, oldPod.Namespace, err) + } + } else { + if err := oc.reconcileEgressIPPod(oldPod, newPod); err != nil { + klog.Errorf("Unable to update egress IP matching pod: %s/%s, err: %v", newPod.Name, newPod.Namespace, err) + } } }, DeleteFunc: func(obj interface{}) { diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index 4954d60345..16557c36d4 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -42,6 +42,10 @@ func (oc *Controller) syncPodsRetriable(pods []interface{}) error { } annotations, err := util.UnmarshalPodAnnotation(pod.Annotations) if util.PodScheduled(pod) && util.PodWantsNetwork(pod) && err == nil { + // skip nodes that are not running ovnk (inferred from host subnets) + if oc.lsManager.IsNonHostSubnetSwitch(pod.Spec.NodeName) { + continue + } logicalPort := util.GetLogicalPortName(pod.Namespace, pod.Name) expectedLogicalPorts[logicalPort] = true if err = oc.waitForNodeLogicalSwitchInCache(pod.Spec.NodeName); err != nil { @@ -81,6 +85,10 @@ func (oc *Controller) syncPodsRetriable(pods []interface{}) error { return fmt.Errorf("failed to get nodes: %v", err) } for _, n := range nodes { + // skip nodes that are not running ovnk (inferred from host subnets) + if oc.lsManager.IsNonHostSubnetSwitch(n.Name) { + continue + } stalePorts := []string{} // find the logical switch for the node ls := &nbdb.LogicalSwitch{} @@ -125,77 +133,70 @@ func (oc *Controller) syncPodsRetriable(pods []interface{}) error { return nil } -func (oc *Controller) deleteLogicalPort(pod *kapi.Pod) { - oc.deletePodExternalGW(pod) +func (oc *Controller) deleteLogicalPort(pod *kapi.Pod, portInfo *lpInfo) (err error) { + podDesc := pod.Namespace + "/" + pod.Name + klog.Infof("Deleting pod: %s", podDesc) + + if err = oc.deletePodExternalGW(pod); err != nil { + return fmt.Errorf("unable to delete external gateway routes for pod %s: %w", podDesc, err) + } if pod.Spec.HostNetwork { - return + return nil } if !util.PodScheduled(pod) { - return + return nil } - podDesc := pod.Namespace + "/" + pod.Name - klog.Infof("Deleting pod: %s", podDesc) - logicalPort := util.GetLogicalPortName(pod.Namespace, pod.Name) - portInfo, err := oc.logicalPortCache.get(logicalPort) - if err != nil { - klog.Errorf(err.Error()) + portUUID := "" + var podIfAddrs []*net.IPNet + if portInfo == nil { // If ovnkube-master restarts, it is also possible the Pod's logical switch port // is not re-added into the cache. Delete logical switch port anyway. - ops, err := oc.ovnNBLSPDel(logicalPort, pod.Spec.NodeName, "") - if err != nil { - klog.Errorf(err.Error()) - } else { - _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) - if err != nil { - klog.Errorf("Cannot delete logical switch port %s, %v", logicalPort, err) - } - } - - // Even if the port is not in the cache, IPs annotated in the Pod annotation may already be allocated, - // need to release them to avoid leakage. annotation, err := util.UnmarshalPodAnnotation(pod.Annotations) - if err == nil { - podIfAddrs := annotation.IPs - _ = oc.lsManager.ReleaseIPs(pod.Spec.NodeName, podIfAddrs) + if err != nil { + return fmt.Errorf("unable to unmarshal pod annocations for pod %s/%s: %w", pod.Namespace, pod.Name, err) } - return + podIfAddrs = annotation.IPs + } else { + portUUID = portInfo.uuid + podIfAddrs = portInfo.ips } - // FIXME: if any of these steps fails we need to stop and try again later... var allOps, ops []ovsdb.Operation - if ops, err = oc.deletePodFromNamespace(pod.Namespace, portInfo); err != nil { - klog.Errorf(err.Error()) - } else { - allOps = append(allOps, ops...) + if ops, err = oc.deletePodFromNamespace(pod.Namespace, podIfAddrs, portUUID); err != nil { + return fmt.Errorf("unable to delete pod %s from namespace: %w", podDesc, err) } + allOps = append(allOps, ops...) - ops, err = oc.ovnNBLSPDel(logicalPort, pod.Spec.NodeName, portInfo.uuid) + ops, err = oc.delLSPOps(logicalPort, pod.Spec.NodeName, portUUID) if err != nil { - klog.Errorf(err.Error()) - } else { - allOps = append(allOps, ops...) + return fmt.Errorf("failed to create delete ops for the lsp: %s: %s", logicalPort, err) } + allOps = append(allOps, ops...) _, err = libovsdbops.TransactAndCheck(oc.nbClient, allOps) if err != nil { - klog.Errorf("Cannot delete logical switch port %s, %v", logicalPort, err) + return fmt.Errorf("cannot delete logical switch port %s, %v", logicalPort, err) } - if err := oc.lsManager.ReleaseIPs(portInfo.logicalSwitch, portInfo.ips); err != nil { - klog.Errorf(err.Error()) + klog.Infof("Attempting to release IPs for pod: %s/%s, ips: %s", pod.Namespace, pod.Name, + util.JoinIPNetIPs(podIfAddrs, " ")) + if err := oc.lsManager.ReleaseIPs(pod.Spec.NodeName, podIfAddrs); err != nil { + return fmt.Errorf("cannot release IPs for pod %s: %w", podDesc, err) } if config.Gateway.DisableSNATMultipleGWs { - if err := deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, []*net.IPNet{}, portInfo.ips); err != nil { - klog.Errorf(err.Error()) + if err := deletePerPodGRSNAT(oc.nbClient, pod.Spec.NodeName, []*net.IPNet{}, podIfAddrs); err != nil { + return fmt.Errorf("cannot delete GR SNAT for pod %s: %w", podDesc, err) } } podNsName := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} - oc.deleteGWRoutesForPod(podNsName, portInfo.ips) + if err := oc.deleteGWRoutesForPod(podNsName, podIfAddrs); err != nil { + return fmt.Errorf("cannot delete GW Routes for pod %s: %w", podDesc, err) + } - oc.logicalPortCache.remove(logicalPort) + return nil } func (oc *Controller) waitForNodeLogicalSwitch(nodeName string) (*nbdb.LogicalSwitch, error) { @@ -572,15 +573,16 @@ func (oc *Controller) addLogicalPort(pod *kapi.Pod) (err error) { if !lspExist { timeout := ovntypes.OVSDBWaitTimeout - allOps = append(allOps, ovsdb.Operation{ - Op: ovsdb.OperationWait, - Timeout: &timeout, - Table: "Logical_Switch_Port", - Where: []ovsdb.Condition{{Column: "name", Function: ovsdb.ConditionEqual, Value: lsp.Name}}, - Columns: []string{"name"}, - Until: "!=", - Rows: []ovsdb.Row{{"name": lsp.Name}}, - }) + condition := model.Condition{ + Field: &lsp.Name, + Function: ovsdb.ConditionEqual, + Value: lsp.Name, + } + waitOps, err := oc.nbClient.Where(lsp, condition).Wait(ovsdb.WaitConditionNotEqual, &timeout, lsp, &lsp.Name) + if err != nil { + return err + } + allOps = append(allOps, waitOps...) // create new logical switch port ops, err := oc.nbClient.Create(lsp) @@ -692,15 +694,9 @@ func (oc *Controller) getPortAddresses(nodeName, portName string) (net.HardwareA return podMac, podIPNets, nil } -// ovnNBLSPDel deletes the given logical switch using the libovsdb library -func (oc *Controller) ovnNBLSPDel(logicalPort, logicalSwitch, lspUUID string) ([]ovsdb.Operation, error) { +// delLSPOps returns the ovsdb operations required to delete the given logical switch port (LSP) +func (oc *Controller) delLSPOps(logicalPort, logicalSwitch, lspUUID string) ([]ovsdb.Operation, error) { var allOps []ovsdb.Operation - ls := &nbdb.LogicalSwitch{} - if lsUUID, ok := oc.lsManager.GetUUID(logicalSwitch); !ok { - return nil, fmt.Errorf("error getting logical switch for node %s: switch not in logical switch cache", logicalSwitch) - } else { - ls.UUID = lsUUID - } lsp := &nbdb.LogicalSwitchPort{ UUID: lspUUID, @@ -709,9 +705,24 @@ func (oc *Controller) ovnNBLSPDel(logicalPort, logicalSwitch, lspUUID string) ([ if lspUUID == "" { ctx, cancel := context.WithTimeout(context.Background(), ovntypes.OVSDBTimeout) defer cancel() - if err := oc.nbClient.Get(ctx, lsp); err != nil { + if err := oc.nbClient.Get(ctx, lsp); err != nil && err != libovsdbclient.ErrNotFound { return nil, fmt.Errorf("cannot delete logical switch port %s failed retrieving the object %v", logicalPort, err) + } else if err == libovsdbclient.ErrNotFound { + // lsp doesn't exist; nothing to do + return allOps, nil + } + } + + ls := &nbdb.LogicalSwitch{} + var err error + if lsUUID, ok := oc.lsManager.GetUUID(logicalSwitch); !ok { + klog.Errorf("Error getting logical switch for node %s: switch not in logical switch cache", logicalSwitch) + // Not in cache: Try getting the logical switch from ovn database (slower method) + if ls, err = libovsdbops.FindSwitchByName(oc.nbClient, logicalSwitch); err != nil { + return nil, fmt.Errorf("can't find switch for node %s: %v", logicalSwitch, err) } + } else { + ls.UUID = lsUUID } ops, err := oc.nbClient.Where(ls).Mutate(ls, model.Mutation{ diff --git a/go-controller/pkg/ovn/pods_retry.go b/go-controller/pkg/ovn/pods_retry.go new file mode 100644 index 0000000000..4bd2ec1e95 --- /dev/null +++ b/go-controller/pkg/ovn/pods_retry.go @@ -0,0 +1,228 @@ +package ovn + +import ( + "fmt" + "math/rand" + "net" + "time" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + kapi "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/klog/v2" +) + +// iterateRetryPods checks if any outstanding pods have been waiting for 60 seconds of last known failure +// then tries to re-add them if so +// updateAll forces all pods to be attempted to be retried regardless of the 1 minute delay +func (oc *Controller) iterateRetryPods(updateAll bool) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + now := time.Now() + for key, podEntry := range oc.retryPods { + if podEntry.ignore { + // neither addition nor deletion is being retried + continue + } + + pod := podEntry.pod + podDesc := fmt.Sprintf("[%s/%s/%s]", pod.UID, pod.Namespace, pod.Name) + // it could be that the Pod got deleted, but Pod's DeleteFunc has not been called yet, so don't retry creation + if podEntry.needsAdd { + kPod, err := oc.watchFactory.GetPod(pod.Namespace, pod.Name) + if err != nil && errors.IsNotFound(err) { + klog.Infof("%s pod not found in the informers cache, not going to retry pod setup", podDesc) + podEntry.needsAdd = false + } else { + pod = kPod + } + } + + if !util.PodScheduled(pod) { + klog.V(5).Infof("Retry: %s not scheduled", podDesc) + continue + } + + podEntry.backoffSec = (podEntry.backoffSec * 2) + if podEntry.backoffSec > 60 { + podEntry.backoffSec = 60 + } + backoff := (podEntry.backoffSec * time.Second) + (time.Duration(rand.Intn(500)) * time.Millisecond) + podTimer := podEntry.timeStamp.Add(backoff) + if updateAll || now.After(podTimer) { + // check if we need to retry delete first + if podEntry.needsDel != nil { + klog.Infof("%s retry pod teardown", podDesc) + if err := oc.removePod(pod, podEntry.needsDel); err != nil { + klog.Infof("%s teardown retry failed; will try again later", podDesc) + podEntry.timeStamp = time.Now() + continue // if deletion failed we will not retry add + } + klog.Infof("%s pod teardown successful", podDesc) + podEntry.needsDel = nil + if !podEntry.needsAdd { + delete(oc.retryPods, key) // this means retryDelete worked, we can remove entry safely + } + } + // check if we need to retry add + if podEntry.needsAdd { + klog.Infof("%s retry pod setup", podDesc) + if err := oc.ensurePod(nil, pod, true); err != nil { + klog.Infof("%s setup retry failed; will try again later", podDesc) + podEntry.timeStamp = time.Now() + } else { + klog.Infof("%s pod setup successful", podDesc) + delete(oc.retryPods, key) // this means retryDelete and retryAdd both worked, we can remove entry safely + } + } + } else { + klog.V(5).Infof("%s retry pod not after timer yet, time: %s", podDesc, podTimer) + } + } +} + +// checkAndDeleteRetryPod deletes a specific entry from the map, if it existed, returns true +func (oc *Controller) checkAndDeleteRetryPod(pod *kapi.Pod) bool { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if _, ok := oc.retryPods[key]; ok { + delete(oc.retryPods, key) + return true + } + return false +} + +// checkAndSkipRetryPod sets a specific entry from the map to be ignored for subsequent retries +// if it existed, returns true +func (oc *Controller) checkAndSkipRetryPod(pod *kapi.Pod) bool { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entry.ignore = true + return true + } + return false +} + +// unSkipRetryPod ensures a pod is no longer ignored for retry loop +func (oc *Controller) unSkipRetryPod(pod *kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entry.ignore = false + } +} + +// initRetryAddPod tracks a pod that failed to be created to potentially retry later (needsAdd = true) +// initially it is marked as skipped for retry loop (ignore = true) +func (oc *Controller) initRetryAddPod(pod *kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entry.timeStamp = time.Now() + entry.pod = pod + entry.needsAdd = true + } else { + oc.retryPods[key] = &retryEntry{pod, time.Now(), 1, true, true, nil} + } +} + +// initRetryDelPod tracks a pod that failed to be deleted to potentially retry later (needsDel != nil) +// initially it is marked as skipped for retry loop (ignore = true) +func (oc *Controller) initRetryDelPod(pod *kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + var portInfo *lpInfo + if !util.PodWantsNetwork(pod) { + // create dummy logicalPortInfo for host-networked pods + mac, _ := net.ParseMAC("00:00:00:00:00:00") + portInfo = &lpInfo{ + logicalSwitch: "host-networked", + name: getPodNamespacedName(pod), + uuid: "host-networked", + ips: []*net.IPNet{}, + mac: mac, + } + } else { + portInfo, _ = oc.logicalPortCache.get(key) + } + if entry, ok := oc.retryPods[key]; ok { + entry.timeStamp = time.Now() + entry.needsDel = portInfo + } else { + oc.retryPods[key] = &retryEntry{pod, time.Now(), 1, true, false, portInfo} + } +} + +func (oc *Controller) removeAddRetry(pod *kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entry.needsAdd = false + } +} + +func (oc *Controller) removeDeleteRetry(pod *kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entry.needsDel = nil + } +} + +func (oc *Controller) getPodRetryEntry(pod *kapi.Pod) *retryEntry { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + key := getPodNamespacedName(pod) + if entry, ok := oc.retryPods[key]; ok { + entryCopy := *entry + return &entryCopy + } + return nil +} + +// addRetryPods adds multiple pods to be retried later for their add events +func (oc *Controller) addRetryPods(pods []kapi.Pod) { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + for _, pod := range pods { + pod := pod + key := getPodNamespacedName(&pod) + if entry, ok := oc.retryPods[key]; ok { + entry.timeStamp = time.Now() + entry.pod = &pod + } else { + oc.retryPods[key] = &retryEntry{&pod, time.Now(), 1, false, true, nil} + } + } +} + +func (oc *Controller) requestRetryPods() { + select { + case oc.retryPodsChan <- struct{}{}: + klog.V(5).Infof("Iterate retry pods requested") + default: + klog.V(5).Infof("Iterate retry pods already requested") + } +} + +// unit testing only +func (oc *Controller) hasPodRetryEntry(pod *kapi.Pod) bool { + oc.retryPodsLock.Lock() + defer oc.retryPodsLock.Unlock() + _, ok := oc.retryPods[getPodNamespacedName(pod)] + return ok +} + +// getPodNamespacedName returns _ for the provided pod +// this is to maintain the same key format as used by logicalPortCache +func getPodNamespacedName(pod *kapi.Pod) string { + return fmt.Sprintf("%s_%s", pod.Namespace, pod.Name) +} diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index 4a989698aa..e76e7c8b68 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -364,6 +364,107 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("allows allocation after pods are completed", func() { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace("namespace1") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup(initialDB, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{}, + }, + ) + + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.WatchNamespaces() + fakeOvn.controller.WatchPods() + + pod, _ := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Get(context.TODO(), t.podName, metav1.GetOptions{}) + gomega.Expect(pod).To(gomega.BeNil()) + + myPod := newPod(t.namespace, t.podName, t.nodeName, t.podIP) + _, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), + myPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(func() string { + return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) + }, 2).Should(gomega.MatchJSON(t.getAnnotationsJson())) + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(getExpectedDataPodsAndSwitches([]testPod{t}, []string{"node1"}))) + + ginkgo.By("Allocating all of the rest of the node subnet") + // allocate all the rest of the IPs in the subnet + fakeOvn.controller.lsManager.AllocateUntilFull("node1") + + ginkgo.By("Creating another pod which will fail due to allocation full") + t2 := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod2", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + myPod2, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), + newPod(t2.namespace, t2.podName, t2.nodeName, t2.podIP), metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { + return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t2.namespace, t2.podName) + }, 2).Should(gomega.HaveLen(0)) + + gomega.Eventually(func() bool { + return fakeOvn.controller.hasPodRetryEntry(myPod2) + }).Should(gomega.BeTrue()) + + ginkgo.By("Marking myPod as completed should free IP") + myPod.Status.Phase = v1.PodSucceeded + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).UpdateStatus(context.TODO(), + myPod, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // port should be gone or marked for removal in logical port cache + logicalPort := util.GetLogicalPortName(myPod.Namespace, myPod.Name) + gomega.Eventually(func() bool { + info, err := fakeOvn.controller.logicalPortCache.get(logicalPort) + return err != nil || !info.expires.IsZero() + }, 2).Should(gomega.BeTrue()) + + // there should also be no entry for this pod in the retry cache + gomega.Eventually(func() bool { + return fakeOvn.controller.getPodRetryEntry(myPod) == nil + }, 2).Should(gomega.BeTrue()) + ginkgo.By("Freed IP should now allow mypod2 to come up") + fakeOvn.controller.requestRetryPods() + gomega.Eventually(func() string { + return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t2.namespace, t2.podName) + }, 2).Should(gomega.MatchJSON(t2.getAnnotationsJson())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("retryPod cache operations while adding a new pod", func() { app.Action = func(ctx *cli.Context) error { config.Gateway.DisableSNATMultipleGWs = true @@ -402,27 +503,168 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { ObjectMeta: metav1.ObjectMeta{ Name: t.podName, Namespace: namespaceT.Name, - UID: types.UID("123"), }, } - success := fakeOvn.controller.ensurePod(nil, podObj, true) // this fails since pod doesn't exist to set annotations - gomega.Expect(success).To(gomega.BeFalse()) + err := fakeOvn.controller.ensurePod(nil, podObj, true) // this fails since pod doesn't exist to set annotations + gomega.Expect(err).To(gomega.HaveOccurred()) gomega.Expect(len(fakeOvn.controller.retryPods)).To(gomega.Equal(0)) fakeOvn.controller.addRetryPods([]v1.Pod{*podObj}) + key := getPodNamespacedName(podObj) gomega.Expect(len(fakeOvn.controller.retryPods)).To(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.retryPods["123"]).ToNot(gomega.BeNil()) - gomega.Expect(fakeOvn.controller.retryPods["123"].ignore).To(gomega.BeFalse()) - gomega.Expect(fakeOvn.controller.retryPods["123"].pod.UID).To(gomega.Equal(podObj.UID)) + gomega.Expect(fakeOvn.controller.retryPods[key]).ToNot(gomega.BeNil()) + gomega.Expect(fakeOvn.controller.retryPods[key].ignore).To(gomega.BeFalse()) + gomega.Expect(fakeOvn.controller.retryPods[key].pod.UID).To(gomega.Equal(podObj.UID)) - fakeOvn.controller.checkAndSkipRetryPod(podObj.UID) - gomega.Expect(fakeOvn.controller.retryPods["123"].ignore).To(gomega.BeTrue()) + fakeOvn.controller.checkAndSkipRetryPod(podObj) + gomega.Expect(fakeOvn.controller.retryPods[key].ignore).To(gomega.BeTrue()) fakeOvn.controller.unSkipRetryPod(podObj) - gomega.Expect(fakeOvn.controller.retryPods["123"].ignore).To(gomega.BeFalse()) + gomega.Expect(fakeOvn.controller.retryPods[key].ignore).To(gomega.BeFalse()) - fakeOvn.controller.checkAndDeleteRetryPod(podObj.UID) - gomega.Expect(fakeOvn.controller.retryPods["123"]).To(gomega.BeNil()) + fakeOvn.controller.checkAndDeleteRetryPod(podObj) + gomega.Expect(fakeOvn.controller.retryPods[key]).To(gomega.BeNil()) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("correctly retries a failure while adding a pod", func() { + app.Action = func(ctx *cli.Context) error { + namespace1 := *newNamespace("namespace1") + podTest := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespace1.Name, + ) + pod := newPod(podTest.namespace, podTest.podName, podTest.nodeName, podTest.podIP) + + fakeOvn.startWithDBSetup(initialDB, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespace1, + }, + }, + &v1.PodList{ + Items: []v1.Pod{*pod}, + }, + ) + + podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.WatchNamespaces() + fakeOvn.asf.ExpectAddressSetWithIPs(podTest.namespace, []string{podTest.podIP}) + gomega.Eventually(fakeOvn.controller.nbClient).Should( + libovsdbtest.HaveData(getExpectedDataPodsAndSwitches([]testPod{}, []string{"node1"})...)) + + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + // trigger pod add which will fail with "context deadline exceeded: while awaiting reconnection" + fakeOvn.controller.WatchPods() + + // sleep long enough for TransactWithRetry to fail, causing pod delete to fail + time.Sleep(ovstypes.OVSDBTimeout + time.Second) + + // check to see if the pod retry cache has an entry for this policy + gomega.Eventually(func() *retryEntry { + return fakeOvn.controller.getPodRetryEntry(pod) + }).ShouldNot(gomega.BeNil()) + connCtx, cancel := context.WithTimeout(context.Background(), ovstypes.OVSDBTimeout) + + defer cancel() + fakeOvn.resetNBClient(connCtx) + fakeOvn.controller.requestRetryPods() // retry the failed entry + + _, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(podTest.namespace).Get(context.TODO(), podTest.podName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.asf.ExpectAddressSetWithIPs(podTest.namespace, []string{podTest.podIP}) + gomega.Eventually(fakeOvn.controller.nbClient).Should( + libovsdbtest.HaveData(getExpectedDataPodsAndSwitches([]testPod{podTest}, []string{"node1"})...)) + // check the retry cache no longer has the entry + gomega.Eventually(func() *retryEntry { + return fakeOvn.controller.getPodRetryEntry(pod) + }).Should(gomega.BeNil()) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("correctly retries a failure while deleting a pod", func() { + app.Action = func(ctx *cli.Context) error { + namespace1 := *newNamespace("namespace1") + podTest := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespace1.Name, + ) + pod := newPod(podTest.namespace, podTest.podName, podTest.nodeName, podTest.podIP) + expectedData := []libovsdbtest.TestData{getExpectedDataPodsAndSwitches([]testPod{podTest}, []string{"node1"})} + + fakeOvn.startWithDBSetup(initialDB, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespace1, + }, + }, + &v1.PodList{ + Items: []v1.Pod{*pod}, + }, + ) + + podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + fakeOvn.controller.WatchNamespaces() + fakeOvn.controller.WatchPods() + + _, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(podTest.namespace).Get(context.TODO(), podTest.podName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) + fakeOvn.asf.ExpectAddressSetWithIPs(podTest.namespace, []string{podTest.podIP}) + + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + // trigger pod delete which will fail with "context deadline exceeded: while awaiting reconnection" + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // sleep long enough for TransactWithRetry to fail, causing pod delete to fail + time.Sleep(ovstypes.OVSDBTimeout + time.Second) + + // check to see if the pod retry cache has an entry for this policy + gomega.Eventually(func() *retryEntry { + return fakeOvn.controller.getPodRetryEntry(pod) + }).ShouldNot(gomega.BeNil()) + connCtx, cancel := context.WithTimeout(context.Background(), ovstypes.OVSDBTimeout) + + defer cancel() + fakeOvn.resetNBClient(connCtx) + fakeOvn.controller.requestRetryPods() // retry the failed entry + + fakeOvn.asf.ExpectEmptyAddressSet(podTest.namespace) + gomega.Eventually(fakeOvn.controller.nbClient).Should( + libovsdbtest.HaveData(getExpectedDataPodsAndSwitches([]testPod{}, []string{"node1"})...)) + // check the retry cache no longer has the entry + gomega.Eventually(func() *retryEntry { + return fakeOvn.controller.getPodRetryEntry(pod) + }).Should(gomega.BeNil()) return nil } diff --git a/go-controller/pkg/ovn/policy.go b/go-controller/pkg/ovn/policy.go index 599be10cb2..facb2716a6 100644 --- a/go-controller/pkg/ovn/policy.go +++ b/go-controller/pkg/ovn/policy.go @@ -189,7 +189,9 @@ func (oc *Controller) syncNetworkPoliciesRetriable(networkPolicies []interface{} if err != nil { return fmt.Errorf("cannot update old Egress NetworkPolicy ACLs: %v", err) } + } + return nil } @@ -200,7 +202,7 @@ func addAllowACLFromNode(nodeName string, mgmtPortIP net.IP, nbClient libovsdbcl } match := fmt.Sprintf("%s.src==%s", ipFamily, mgmtPortIP.String()) - nodeACL := libovsdbops.BuildACL("", types.DirectionToLPort, types.DefaultAllowPriority, match, "allow-related", "", "", false, nil) + nodeACL := libovsdbops.BuildACL("", types.DirectionToLPort, types.DefaultAllowPriority, match, "allow-related", "", "", false, nil, nil) if err := libovsdbops.AddACLToNodeSwitch(nbClient, nodeName, nodeACL); err != nil { return fmt.Errorf("failed to add acl to allow node sourced traffic err: %v", err) @@ -237,6 +239,7 @@ func namespacePortGroupACLName(namespace, portGroup, name string) string { } func buildACL(namespace, portGroup, name, direction string, priority int, match, action, aclLogging string, policyType knet.PolicyType) *nbdb.ACL { + var options map[string]string aclName := namespacePortGroupACLName(namespace, portGroup, name) log := aclLogging != "" severity := getACLLoggingSeverity(aclLogging) @@ -247,8 +250,13 @@ func buildACL(namespace, portGroup, name, direction string, priority int, match, defaultDenyPolicyTypeACLExtIdKey: string(policyType), } } + if policyType == knet.PolicyTypeEgress { + options = map[string]string{ + "apply-after-lb": "true", + } + } - return libovsdbops.BuildACL(aclName, direction, priority, match, action, meter, severity, log, externalIds) + return libovsdbops.BuildACL(aclName, direction, priority, match, action, meter, severity, log, externalIds, options) } func defaultDenyPortGroup(namespace, gressSuffix string) string { @@ -257,7 +265,7 @@ func defaultDenyPortGroup(namespace, gressSuffix string) string { func buildDenyACLs(namespace, policy, pg, aclLogging string, policyType knet.PolicyType) (denyACL, allowACL *nbdb.ACL) { denyMatch := getACLMatch(pg, "", policyType) - allowMatch := getACLMatch(pg, "arp", policyType) + allowMatch := getACLMatch(pg, "(arp || nd)", policyType) if policyType == knet.PolicyTypeIngress { denyACL = buildACL(namespace, pg, policy, nbdb.ACLDirectionToLport, types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, policyType) allowACL = buildACL(namespace, pg, "ARPallowPolicy", nbdb.ACLDirectionToLport, types.DefaultAllowPriority, allowMatch, nbdb.ACLActionAllow, "", policyType) @@ -453,6 +461,9 @@ func (oc *Controller) createMulticastAllowPolicy(ns string, nsInfo *namespaceInf klog.Warningf("Failed to get pods for namespace %q: %v", ns, err) } for _, pod := range pods { + if util.PodCompleted(pod) { + continue + } portName := util.GetLogicalPortName(pod.Namespace, pod.Name) if portInfo, err := oc.logicalPortCache.get(portName); err != nil { klog.Errorf(err.Error()) @@ -585,8 +596,8 @@ func podAddAllowMulticastPolicy(nbClient libovsdbclient.Client, ns string, portI // podDeleteAllowMulticastPolicy removes the pod's logical switch port from the // namespace's multicast port group. Caller must hold the namespace's // namespaceInfo object lock. -func podDeleteAllowMulticastPolicy(nbClient libovsdbclient.Client, ns string, portInfo *lpInfo) error { - return libovsdbops.DeletePortsFromPortGroup(nbClient, hashedPortGroup(ns), portInfo.uuid) +func podDeleteAllowMulticastPolicy(nbClient libovsdbclient.Client, ns string, portUUID string) error { + return libovsdbops.DeletePortsFromPortGroup(nbClient, hashedPortGroup(ns), portUUID) } // localPodAddDefaultDeny ensures ports (i.e. pods) are in the correct @@ -753,6 +764,11 @@ func (oc *Controller) processLocalPodSelectorSetPods(policy *knet.NetworkPolicy, for _, obj := range objs { pod := obj.(*kapi.Pod) + if util.PodCompleted(pod) { + // if pod is completed, do not add it to NP port group + continue + } + getPolicyPortsWg.Add(1) go getPortInfo(pod) } @@ -897,7 +913,12 @@ func (oc *Controller) handleLocalPodSelector( oc.handleLocalPodSelectorDelFunc(policy, np, portGroupIngressDenyName, portGroupEgressDenyName, obj) }, UpdateFunc: func(oldObj, newObj interface{}) { - oc.handleLocalPodSelectorAddFunc(policy, np, portGroupIngressDenyName, portGroupEgressDenyName, newObj) + pod := newObj.(*kapi.Pod) + if util.PodCompleted(pod) { + oc.handleLocalPodSelectorDelFunc(policy, np, portGroupIngressDenyName, portGroupEgressDenyName, oldObj) + } else { + oc.handleLocalPodSelectorAddFunc(policy, np, portGroupIngressDenyName, portGroupEgressDenyName, newObj) + } }, }, func(objs []interface{}) { handleInitialItems(objs) @@ -1384,7 +1405,12 @@ func (oc *Controller) handlePeerPodSelector( oc.handlePeerPodSelectorDelete(gp, obj) }, UpdateFunc: func(oldObj, newObj interface{}) { - oc.handlePeerPodSelectorAddUpdate(gp, newObj) + pod := newObj.(*kapi.Pod) + if util.PodCompleted(pod) { + oc.handlePeerPodSelectorDelete(gp, oldObj) + } else { + oc.handlePeerPodSelectorAddUpdate(gp, newObj) + } }, }, func(objs []interface{}) { oc.handlePeerPodSelectorAddUpdate(gp, objs...) @@ -1424,7 +1450,12 @@ func (oc *Controller) handlePeerNamespaceAndPodSelector( oc.handlePeerPodSelectorDelete(gp, obj) }, UpdateFunc: func(oldObj, newObj interface{}) { - oc.handlePeerPodSelectorAddUpdate(gp, newObj) + pod := oldObj.(*kapi.Pod) + if util.PodCompleted(pod) { + oc.handlePeerPodSelectorDelete(gp, oldObj) + } else { + oc.handlePeerPodSelectorAddUpdate(gp, newObj) + } }, }, func(objs []interface{}) { oc.handlePeerPodSelectorAddUpdate(gp, objs...) diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index 273c28fb42..7fde141d25 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -107,6 +107,9 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress), }, + map[string]string{ + "apply-after-lb": "true", + }, ) egressDenyACL.UUID = libovsdbops.BuildNamedUUID() @@ -114,7 +117,7 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po networkPolicy.Namespace+"_ARPallowPolicy", egressDirection, types.DefaultAllowPriority, - "inport == @"+pgHash+"_"+egressDenyPG+" && arp", + "inport == @"+pgHash+"_"+egressDenyPG+" && (arp || nd)", nbdb.ACLActionAllow, types.OvnACLLoggingMeter, nbdb.ACLSeverityInfo, @@ -122,6 +125,9 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress), }, + map[string]string{ + "apply-after-lb": "true", + }, ) egressAllowACL.UUID = libovsdbops.BuildNamedUUID() @@ -137,6 +143,7 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress), }, + nil, ) ingressDenyACL.UUID = libovsdbops.BuildNamedUUID() @@ -144,7 +151,7 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po networkPolicy.Namespace+"_ARPallowPolicy", nbdb.ACLDirectionToLport, types.DefaultAllowPriority, - "outport == @"+pgHash+"_"+ingressDenyPG+" && arp", + "outport == @"+pgHash+"_"+ingressDenyPG+" && (arp || nd)", nbdb.ACLActionAllow, types.OvnACLLoggingMeter, nbdb.ACLSeverityInfo, @@ -152,6 +159,7 @@ func (n kNetworkPolicy) getDefaultDenyData(networkPolicy *knet.NetworkPolicy, po map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress), }, + nil, ) ingressAllowACL.UUID = libovsdbops.BuildNamedUUID() @@ -214,6 +222,7 @@ func (n kNetworkPolicy) getPolicyData(networkPolicy *knet.NetworkPolicy, policyP policyTypeACLExtIdKey: policyType, policyType + "_num": strconv.Itoa(i), }, + nil, ) acl.UUID = libovsdbops.BuildNamedUUID() acls = append(acls, acl) @@ -236,6 +245,7 @@ func (n kNetworkPolicy) getPolicyData(networkPolicy *knet.NetworkPolicy, policyP policyTypeACLExtIdKey: policyType, policyType + "_num": strconv.Itoa(i), }, + nil, ) acl.UUID = libovsdbops.BuildNamedUUID() acls = append(acls, acl) @@ -268,6 +278,9 @@ func (n kNetworkPolicy) getPolicyData(networkPolicy *knet.NetworkPolicy, policyP policyTypeACLExtIdKey: policyType, policyType + "_num": strconv.Itoa(i), }, + map[string]string{ + "apply-after-lb": "true", + }, ) acl.UUID = libovsdbops.BuildNamedUUID() acls = append(acls, acl) @@ -290,6 +303,9 @@ func (n kNetworkPolicy) getPolicyData(networkPolicy *knet.NetworkPolicy, policyP policyTypeACLExtIdKey: policyType, policyType + "_num": strconv.Itoa(i), }, + map[string]string{ + "apply-after-lb": "true", + }, ) acl.UUID = libovsdbops.BuildNamedUUID() acls = append(acls, acl) @@ -387,6 +403,9 @@ func (p multicastPolicy) getMulticastPolicyExpectedData(ns string, ports []strin map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeEgress), }, + map[string]string{ + "apply-after-lb": "true", + }, ) egressACL.UUID = libovsdbops.BuildNamedUUID() @@ -402,6 +421,7 @@ func (p multicastPolicy) getMulticastPolicyExpectedData(ns string, ports []strin map[string]string{ defaultDenyPolicyTypeACLExtIdKey: string(knet.PolicyTypeIngress), }, + nil, ) ingressACL.UUID = libovsdbops.BuildNamedUUID() @@ -1366,8 +1386,8 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { ) npTest := kNetworkPolicy{} - gressPolicy1ExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, nil, []int32{portNum}, nbdb.ACLSeverityInfo) - defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo) + gressPolicy1ExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, nil, []int32{portNum}, nbdb.ACLSeverityInfo, false) + defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo, false) expectedData := []libovsdb.TestData{} expectedData = append(expectedData, gressPolicy1ExpectedData...) expectedData = append(expectedData, defaultDenyExpectedData...) @@ -1421,7 +1441,7 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { fakeOvn.resetNBClient(connCtx) fakeOvn.controller.RequestRetryPolicy() - gressPolicy2ExpectedData := npTest.getPolicyData(networkPolicy2, []string{nPodTest.portUUID}, nil, []int32{portNum + 1}, nbdb.ACLSeverityInfo) + gressPolicy2ExpectedData := npTest.getPolicyData(networkPolicy2, []string{nPodTest.portUUID}, nil, []int32{portNum + 1}, nbdb.ACLSeverityInfo, false) expectedDataWithPolicy2 := append(expectedData, gressPolicy2ExpectedData...) gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedDataWithPolicy2...)) // check the cache no longer has the entry @@ -1501,8 +1521,8 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { ) npTest := kNetworkPolicy{} - gressPolicy1ExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, nil, []int32{portNum}, nbdb.ACLSeverityInfo) - defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo) + gressPolicy1ExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, nil, []int32{portNum}, nbdb.ACLSeverityInfo, false) + defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo, false) expectedData := []libovsdb.TestData{} expectedData = append(expectedData, gressPolicy1ExpectedData...) expectedData = append(expectedData, defaultDenyExpectedData...) @@ -1562,8 +1582,8 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace).Create(context.TODO(), networkPolicy2, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gressPolicy2ExpectedData := npTest.getPolicyData(networkPolicy2, []string{nPodTest.portUUID}, nil, []int32{portNum + 1}, nbdb.ACLSeverityInfo) - defaultDenyExpectedData2 := npTest.getDefaultDenyData(networkPolicy2, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo) + gressPolicy2ExpectedData := npTest.getPolicyData(networkPolicy2, []string{nPodTest.portUUID}, nil, []int32{portNum + 1}, nbdb.ACLSeverityInfo, false) + defaultDenyExpectedData2 := npTest.getDefaultDenyData(networkPolicy2, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo, false) expectedData = []libovsdb.TestData{} // FIXME(trozet): libovsdb server doesn't remove referenced ACLs to PG when deleting the PG @@ -2229,8 +2249,8 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { }) npTest := kNetworkPolicy{} - gressPolicyExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, []string{}, nil, nbdb.ACLSeverityInfo) - defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo) + gressPolicyExpectedData := npTest.getPolicyData(networkPolicy, []string{nPodTest.portUUID}, []string{}, nil, nbdb.ACLSeverityInfo, false) + defaultDenyExpectedData := npTest.getDefaultDenyData(networkPolicy, []string{nPodTest.portUUID}, nbdb.ACLSeverityInfo, false) expectedData := []libovsdb.TestData{} expectedData = append(expectedData, gressPolicyExpectedData...) expectedData = append(expectedData, defaultDenyExpectedData...) @@ -2542,7 +2562,7 @@ func buildExpectedACL(gp *gressPolicy, pgName string, as []string) *nbdb.ACL { policyTypeACLExtIdKey: gpDirection, gpDirection + "_num": fmt.Sprintf("%d", gp.idx), } - acl := libovsdbops.BuildACL(name, nbdb.ACLDirectionToLport, types.DefaultAllowPriority, match, nbdb.ACLActionAllowRelated, types.OvnACLLoggingMeter, nbdb.ACLSeverityInfo, true, externalIds) + acl := libovsdbops.BuildACL(name, nbdb.ACLDirectionToLport, types.DefaultAllowPriority, match, nbdb.ACLActionAllowRelated, types.OvnACLLoggingMeter, nbdb.ACLSeverityInfo, true, externalIds, nil) return acl } @@ -2766,7 +2786,7 @@ func generateAllowFromNodeData(nodeName, mgmtIP string) (nodeSwitch *nbdb.Logica match := fmt.Sprintf("%s.src==%s", ipFamily, mgmtIP) - nodeACL := libovsdbops.BuildACL("", types.DirectionToLPort, types.DefaultAllowPriority, match, "allow-related", "", "", false, nil) + nodeACL := libovsdbops.BuildACL("", types.DirectionToLPort, types.DefaultAllowPriority, match, "allow-related", "", "", false, nil, nil) nodeACL.UUID = libovsdbops.BuildNamedUUID() testNode := &nbdb.LogicalSwitch{ diff --git a/go-controller/pkg/ovn/port_cache.go b/go-controller/pkg/ovn/port_cache.go index 8af102ec2b..77746542fe 100644 --- a/go-controller/pkg/ovn/port_cache.go +++ b/go-controller/pkg/ovn/port_cache.go @@ -37,7 +37,8 @@ func (c *portCache) get(logicalPort string) (*lpInfo, error) { c.RLock() defer c.RUnlock() if info, ok := c.cache[logicalPort]; ok { - return info, nil + x := *info + return &x, nil } return nil, fmt.Errorf("logical port %s not found in cache", logicalPort) } diff --git a/go-controller/pkg/ovn/topology_version.go b/go-controller/pkg/ovn/topology_version.go index a3e9753742..90d3e7371c 100644 --- a/go-controller/pkg/ovn/topology_version.go +++ b/go-controller/pkg/ovn/topology_version.go @@ -52,7 +52,7 @@ func (oc *Controller) reportTopologyVersion(ctx context.Context) error { ExternalIDs: logicalRouterRes[0].ExternalIDs, } opModel := libovsdbops.OperationModel{ - Name: logicalRouter.Name, + Name: &logicalRouter.Name, Model: &logicalRouter, ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return lr.Name == ovntypes.OVNClusterRouter }, OnModelUpdates: []interface{}{ diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 07378c3be0..83ba372ea2 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -75,6 +75,7 @@ func newKubernetesRestConfig(conf *config.KubernetesConfig) (*rest.Config, error kconfig = &rest.Config{ Host: conf.APIServer, BearerToken: conf.Token, + BearerTokenFile: conf.TokenFile, TLSClientConfig: rest.TLSClientConfig{CAData: conf.CAData}, } } else if strings.HasPrefix(conf.APIServer, "http") { @@ -216,6 +217,11 @@ func PodWantsNetwork(pod *kapi.Pod) bool { return !pod.Spec.HostNetwork } +// PodCompleted checks if the pod is marked as completed (in a terminal state) +func PodCompleted(pod *kapi.Pod) bool { + return pod.Status.Phase == kapi.PodSucceeded || pod.Status.Phase == kapi.PodFailed +} + // PodScheduled returns if the given pod is scheduled func PodScheduled(pod *kapi.Pod) bool { return pod.Spec.NodeName != "" diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 94ced682e8..f3ff9f83fa 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -297,7 +297,7 @@ func UpdateNodeSwitchExcludeIPs(nbClient libovsdbclient.Client, nodeName string, opModels := []libovsdbops.OperationModel{ { - Name: logicalSwitchDes.Name, + Name: &logicalSwitchDes.Name, Model: &logicalSwitchDes, ModelPredicate: func(ls *nbdb.LogicalSwitch) bool { return ls.Name == nodeName }, OnModelMutations: []interface{}{ diff --git a/go-controller/vendor/github.com/go-logr/logr/funcr/funcr.go b/go-controller/vendor/github.com/go-logr/logr/funcr/funcr.go index 8ecf434edd..b23ab9679a 100644 --- a/go-controller/vendor/github.com/go-logr/logr/funcr/funcr.go +++ b/go-controller/vendor/github.com/go-logr/logr/funcr/funcr.go @@ -126,6 +126,14 @@ type Options struct { // called for key-value pairs passed directly to Info and Error. See // RenderBuiltinsHook for more details. RenderArgsHook func(kvList []interface{}) []interface{} + + // MaxLogDepth tells funcr how many levels of nested fields (e.g. a struct + // that contains a struct, etc.) it may log. Every time it finds a struct, + // slice, array, or map the depth is increased by one. When the maximum is + // reached, the value will be converted to a string indicating that the max + // depth has been exceeded. If this field is not specified, a default + // value will be used. + MaxLogDepth int } // MessageClass indicates which category or categories of messages to consider. @@ -193,11 +201,16 @@ func NewFormatterJSON(opts Options) Formatter { return newFormatter(opts, outputJSON) } -const defaultTimestampFmt = "2006-01-02 15:04:05.000000" +// Defaults for Options. +const defaultTimestampFormat = "2006-01-02 15:04:05.000000" +const defaultMaxLogDepth = 16 func newFormatter(opts Options, outfmt outputFormat) Formatter { if opts.TimestampFormat == "" { - opts.TimestampFormat = defaultTimestampFmt + opts.TimestampFormat = defaultTimestampFormat + } + if opts.MaxLogDepth == 0 { + opts.MaxLogDepth = defaultMaxLogDepth } f := Formatter{ outputFormat: outfmt, @@ -321,7 +334,7 @@ func (f Formatter) flatten(buf *bytes.Buffer, kvList []interface{}, continuing b } func (f Formatter) pretty(value interface{}) string { - return f.prettyWithFlags(value, 0) + return f.prettyWithFlags(value, 0, 0) } const ( @@ -329,7 +342,11 @@ const ( ) // TODO: This is not fast. Most of the overhead goes here. -func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { +func (f Formatter) prettyWithFlags(value interface{}, flags uint32, depth int) string { + if depth > f.opts.MaxLogDepth { + return `""` + } + // Handle types that take full control of logging. if v, ok := value.(logr.Marshaler); ok { // Replace the value with what the type wants to get logged. @@ -394,7 +411,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { // arbitrary keys might need escaping buf.WriteString(prettyString(v[i].(string))) buf.WriteByte(':') - buf.WriteString(f.pretty(v[i+1])) + buf.WriteString(f.prettyWithFlags(v[i+1], 0, depth+1)) } if flags&flagRawStruct == 0 { buf.WriteByte('}') @@ -464,7 +481,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { buf.WriteByte(',') } if fld.Anonymous && fld.Type.Kind() == reflect.Struct && name == "" { - buf.WriteString(f.prettyWithFlags(v.Field(i).Interface(), flags|flagRawStruct)) + buf.WriteString(f.prettyWithFlags(v.Field(i).Interface(), flags|flagRawStruct, depth+1)) continue } if name == "" { @@ -475,7 +492,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { buf.WriteString(name) buf.WriteByte('"') buf.WriteByte(':') - buf.WriteString(f.pretty(v.Field(i).Interface())) + buf.WriteString(f.prettyWithFlags(v.Field(i).Interface(), 0, depth+1)) } if flags&flagRawStruct == 0 { buf.WriteByte('}') @@ -488,7 +505,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { buf.WriteByte(',') } e := v.Index(i) - buf.WriteString(f.pretty(e.Interface())) + buf.WriteString(f.prettyWithFlags(e.Interface(), 0, depth+1)) } buf.WriteByte(']') return buf.String() @@ -513,7 +530,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { keystr = prettyString(keystr) } else { // prettyWithFlags will produce already-escaped values - keystr = f.prettyWithFlags(it.Key().Interface(), 0) + keystr = f.prettyWithFlags(it.Key().Interface(), 0, depth+1) if t.Key().Kind() != reflect.String { // JSON only does string keys. Unlike Go's standard JSON, we'll // convert just about anything to a string. @@ -522,7 +539,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { } buf.WriteString(keystr) buf.WriteByte(':') - buf.WriteString(f.pretty(it.Value().Interface())) + buf.WriteString(f.prettyWithFlags(it.Value().Interface(), 0, depth+1)) i++ } buf.WriteByte('}') @@ -531,7 +548,7 @@ func (f Formatter) prettyWithFlags(value interface{}, flags uint32) string { if v.IsNil() { return "null" } - return f.pretty(v.Elem().Interface()) + return f.prettyWithFlags(v.Elem().Interface(), 0, depth) } return fmt.Sprintf(`""`, t.Kind().String()) } @@ -722,15 +739,16 @@ func (f *Formatter) AddName(name string) { func (f *Formatter) AddValues(kvList []interface{}) { // Three slice args forces a copy. n := len(f.values) - vals := f.values[:n:n] - vals = append(vals, kvList...) + f.values = append(f.values[:n:n], kvList...) + + vals := f.values if hook := f.opts.RenderValuesHook; hook != nil { vals = hook(f.sanitize(vals)) } // Pre-render values, so we don't have to do it on each Info/Error call. buf := bytes.NewBuffer(make([]byte, 0, 1024)) - f.values = f.flatten(buf, vals, false, true) // escape user-provided keys + f.flatten(buf, vals, false, true) // escape user-provided keys f.valuesStr = buf.String() } diff --git a/go-controller/vendor/github.com/go-logr/logr/logr.go b/go-controller/vendor/github.com/go-logr/logr/logr.go index 44cd398c9f..c05482a203 100644 --- a/go-controller/vendor/github.com/go-logr/logr/logr.go +++ b/go-controller/vendor/github.com/go-logr/logr/logr.go @@ -43,7 +43,9 @@ limitations under the License. // // Info() and Error() are very similar, but they are separate methods so that // LogSink implementations can choose to do things like attach additional -// information (such as stack traces) on calls to Error(). +// information (such as stack traces) on calls to Error(). Error() messages are +// always logged, regardless of the current verbosity. If there is no error +// instance available, passing nil is valid. // // Verbosity // @@ -53,6 +55,7 @@ limitations under the License. // Log-lines with V-levels that are not enabled (as per the LogSink) will not // be written. Level V(0) is the default, and logger.V(0).Info() has the same // meaning as logger.Info(). Negative V-levels have the same meaning as V(0). +// Error messages do not have a verbosity level and are always logged. // // Where we might have written: // if flVerbose >= 2 { @@ -253,11 +256,13 @@ func (l Logger) Info(msg string, keysAndValues ...interface{}) { // Error logs an error, with the given message and key/value pairs as context. // It functions similarly to Info, but may have unique behavior, and should be // preferred for logging errors (see the package documentations for more -// information). +// information). The log message will always be emitted, regardless of +// verbosity level. // // The msg argument should be used to add context to any underlying error, // while the err argument should be used to attach the actual error that -// triggered this log line, if present. +// triggered this log line, if present. The err parameter is optional +// and nil may be passed instead of an error instance. func (l Logger) Error(err error, msg string, keysAndValues ...interface{}) { if withHelper, ok := l.sink.(CallStackHelperLogSink); ok { withHelper.GetCallStackHelper()() diff --git a/go-controller/vendor/github.com/go-logr/stdr/README.md b/go-controller/vendor/github.com/go-logr/stdr/README.md index 3faf46dfd9..5158667890 100644 --- a/go-controller/vendor/github.com/go-logr/stdr/README.md +++ b/go-controller/vendor/github.com/go-logr/stdr/README.md @@ -1,4 +1,6 @@ # Minimal Go logging using logr and Go's standard library +[![Go Reference](https://pkg.go.dev/badge/github.com/go-logr/stdr.svg)](https://pkg.go.dev/github.com/go-logr/stdr) + This package implements the [logr interface](https://github.com/go-logr/logr) -in terms of Go's standard log package(https://godoc.org/std/log). +in terms of Go's standard log package(https://pkg.go.dev/log). diff --git a/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go b/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go index 5199a4b319..bbb158fc25 100644 --- a/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go +++ b/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go @@ -357,7 +357,7 @@ func (r *RowCache) RowsByCondition(conditions []ovsdb.Condition) (map[string]mod results[rowUUID] = row } } - } else if index, err := r.Index(condition.Column); err != nil { + } else if index, err := r.Index(condition.Column); err == nil { for k, rowUUID := range index { tSchema := schema.Columns[condition.Column] nativeValue, err := ovsdb.OvsToNative(tSchema, condition.Value) @@ -383,7 +383,12 @@ func (r *RowCache) RowsByCondition(conditions []ovsdb.Condition) (map[string]mod if err != nil { return nil, err } - ok, err := condition.Function.Evaluate(value, condition.Value) + tSchema := schema.Columns[condition.Column] + nativeValue, err := ovsdb.OvsToNative(tSchema, condition.Value) + if err != nil { + return nil, err + } + ok, err := condition.Function.Evaluate(value, nativeValue) if err != nil { return nil, err } diff --git a/go-controller/vendor/github.com/ovn-org/libovsdb/server/database.go b/go-controller/vendor/github.com/ovn-org/libovsdb/server/database.go index 1edd1f79c1..0c0e55de69 100644 --- a/go-controller/vendor/github.com/ovn-org/libovsdb/server/database.go +++ b/go-controller/vendor/github.com/ovn-org/libovsdb/server/database.go @@ -67,7 +67,7 @@ func (db *inMemoryDatabase) Commit(database string, id uuid.UUID, updates ovsdb. } db.mutex.RLock() targetDb := db.databases[database] - db.mutex.RLock() + db.mutex.RUnlock() return targetDb.Populate2(updates) } @@ -77,7 +77,7 @@ func (db *inMemoryDatabase) CheckIndexes(database string, table string, m model. } db.mutex.RLock() targetDb := db.databases[database] - db.mutex.RLock() + db.mutex.RUnlock() targetTable := targetDb.Table(table) return targetTable.IndexExists(m) } @@ -88,7 +88,7 @@ func (db *inMemoryDatabase) List(database, table string, conditions ...ovsdb.Con } db.mutex.RLock() targetDb := db.databases[database] - db.mutex.RLock() + db.mutex.RUnlock() targetTable := targetDb.Table(table) if targetTable == nil { @@ -104,7 +104,7 @@ func (db *inMemoryDatabase) Get(database, table string, uuid string) (model.Mode } db.mutex.RLock() targetDb := db.databases[database] - db.mutex.RLock() + db.mutex.RUnlock() targetTable := targetDb.Table(table) if targetTable == nil { diff --git a/go-controller/vendor/github.com/ovn-org/libovsdb/server/server.go b/go-controller/vendor/github.com/ovn-org/libovsdb/server/server.go index 19c037a4b8..9be6886c9f 100644 --- a/go-controller/vendor/github.com/ovn-org/libovsdb/server/server.go +++ b/go-controller/vendor/github.com/ovn-org/libovsdb/server/server.go @@ -2,6 +2,7 @@ package server import ( "encoding/json" + "errors" "fmt" "log" "net" @@ -31,6 +32,7 @@ type OvsdbServer struct { monitors map[*rpc2.Client]*connectionMonitors monitorMutex sync.RWMutex logger logr.Logger + txnMutex sync.Mutex } // NewOvsdbServer returns a new OvsdbServer @@ -187,6 +189,12 @@ func (o *OvsdbServer) NewTransaction(model model.DatabaseModel, dbName string, d // Transact issues a new database transaction and returns the results func (o *OvsdbServer) Transact(client *rpc2.Client, args []json.RawMessage, reply *[]ovsdb.OperationResult) error { + // While allowing other rpc handlers to run in parallel, this ovsdb server expects transactions + // to be serialized. The following mutex ensures that. + // Ref: https://github.com/cenkalti/rpc2/blob/c1acbc6ec984b7ae6830b6a36b62f008d5aefc4c/client.go#L187 + o.txnMutex.Lock() + defer o.txnMutex.Unlock() + if len(args) < 2 { return fmt.Errorf("not enough args") } @@ -229,6 +237,12 @@ func (o *OvsdbServer) Transact(client *rpc2.Client, args []json.RawMessage, repl } response, updates := o.transact(db, ops) *reply = response + for i, operResult := range response { + if operResult.Error != "" { + o.logger.Error(errors.New("failed to process operation"), "Skipping transaction DB commit due to error", "operations", ops, "failed operation", ops[i], "operation error", operResult.Error) + return nil + } + } transactionID := uuid.New() o.processMonitors(transactionID, updates) return o.db.Commit(db, transactionID, updates) diff --git a/go-controller/vendor/github.com/ovn-org/libovsdb/server/transact.go b/go-controller/vendor/github.com/ovn-org/libovsdb/server/transact.go index 26aa541172..33be01ee51 100644 --- a/go-controller/vendor/github.com/ovn-org/libovsdb/server/transact.go +++ b/go-controller/vendor/github.com/ovn-org/libovsdb/server/transact.go @@ -98,11 +98,11 @@ func (o *OvsdbServer) transact(name string, operations []ovsdb.Operation) ([]ovs func (t *Transaction) rowsFromTransactionCacheAndDatabase(table string, where []ovsdb.Condition) (map[string]model.Model, error) { txnRows, err := t.Cache.Table(table).RowsByCondition(where) if err != nil { - return nil, fmt.Errorf("failed getting rows for table %s from transaction cache", table) + return nil, fmt.Errorf("failed getting rows for table %s from transaction cache: %v", table, err) } rows, err := t.Database.List(t.DbName, table, where...) if err != nil { - return nil, fmt.Errorf("failed getting rows for table %s from database", table) + return nil, fmt.Errorf("failed getting rows for table %s from database: %v", table, err) } // prefer rows from transaction cache while copying into cache diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index c69450fec8..72834364a2 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -88,11 +88,11 @@ github.com/evanphx/json-patch # github.com/fsnotify/fsnotify v1.4.9 ## explicit; go 1.13 github.com/fsnotify/fsnotify -# github.com/go-logr/logr v1.2.0 +# github.com/go-logr/logr v1.2.2 ## explicit; go 1.16 github.com/go-logr/logr github.com/go-logr/logr/funcr -# github.com/go-logr/stdr v1.1.0 +# github.com/go-logr/stdr v1.2.2 ## explicit; go 1.16 github.com/go-logr/stdr # github.com/gogo/protobuf v1.3.2 => github.com/gogo/protobuf v1.3.2 @@ -232,7 +232,7 @@ github.com/openshift/client-go/cloudnetwork/informers/externalversions/cloudnetw github.com/openshift/client-go/cloudnetwork/informers/externalversions/cloudnetwork/v1 github.com/openshift/client-go/cloudnetwork/informers/externalversions/internalinterfaces github.com/openshift/client-go/cloudnetwork/listers/cloudnetwork/v1 -# github.com/ovn-org/libovsdb v0.6.1-0.20220225160119-c8b4494412b1 +# github.com/ovn-org/libovsdb v0.6.1-0.20220328142833-2cbe2d093e12 ## explicit; go 1.16 github.com/ovn-org/libovsdb/cache github.com/ovn-org/libovsdb/client diff --git a/test/scripts/e2e-kind.sh b/test/scripts/e2e-kind.sh index 43bff6568c..45314f6ab8 100755 --- a/test/scripts/e2e-kind.sh +++ b/test/scripts/e2e-kind.sh @@ -20,7 +20,6 @@ DisruptionController # FEATURES NOT AVAILABLE IN OUR CI ENVIRONMENT \[Feature:Federation\] should have ipv4 and ipv6 internal node ip -should have ipv4 and ipv6 node podCIDRs # TESTS THAT ASSUME KUBE-PROXY kube-proxy