From 13e5f28c30e2384bc6f1dab7529f9f678fea6f7c Mon Sep 17 00:00:00 2001 From: viktor-kurchenko Date: Mon, 13 May 2024 08:20:48 +0300 Subject: [PATCH] clustermesh: setup fix and CI ClusterMesh connectivity tests setup fixed to support test-concurrency param. Kind workflow (clustermesh part) updated with test-concurrency. Signed-off-by: viktor-kurchenko --- .github/workflows/kind.yaml | 26 ++++++++++++++++++-------- connectivity/check/context.go | 6 ++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/kind.yaml b/.github/workflows/kind.yaml index 73d8ff4cf6..add10f0104 100644 --- a/.github/workflows/kind.yaml +++ b/.github/workflows/kind.yaml @@ -201,6 +201,14 @@ jobs: helm-upgrade-clustermesh: name: Kind Helm Upgrade Clustermesh runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: + # run connectivity tests explicitly without concurrency + - test-concurrency: 1 + # run connectivity tests concurrently + - test-concurrency: 5 timeout-minutes: 50 env: @@ -326,19 +334,21 @@ jobs: # # Dispatch interval is set to 100ms, b/c otherwise (default is 0), the flow validation might time out. cilium connectivity test --context $CLUSTER1 --multi-cluster $CLUSTER2 --debug \ + --test-concurrency=${{ matrix.test-concurrency }} \ --conn-disrupt-dispatch-interval 100ms \ --include-conn-disrupt-test --conn-disrupt-test-setup cilium connectivity test --context $CLUSTER1 --multi-cluster $CLUSTER2 --debug \ + --test-concurrency=${{ matrix.test-concurrency }} \ --include-unsafe-tests --include-conn-disrupt-test \ - --collect-sysdump-on-failure --junit-file cilium-junit-clustermesh-1.xml \ + --collect-sysdump-on-failure --junit-file cilium-junit-clustermesh-1-concurrency-${{ matrix.test-concurrency }}.xml \ --junit-property mode=clustermesh --junit-property type=ipsec - name: Upload JUnit if: ${{ always() }} uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: - name: cilium-junits-helm-upgrade-clustermesh + name: cilium-junits-helm-upgrade-clustermesh-concurrency-${{ matrix.test-concurrency }} path: cilium-junit*.xml retention-days: 2 @@ -347,24 +357,24 @@ jobs: run: | cilium --context $CLUSTER1 status kubectl --context $CLUSTER1 get pods --all-namespaces -o wide - cilium --context $CLUSTER1 sysdump --output-filename cilium-sysdump-out-c1 + cilium --context $CLUSTER1 sysdump --output-filename cilium-sysdump-out-c1-concurrency-${{ matrix.test-concurrency }} cilium --context $CLUSTER2 status kubectl --context $CLUSTER2 get pods --all-namespaces -o wide - cilium --context $CLUSTER2 sysdump --output-filename cilium-sysdump-out-c2 + cilium --context $CLUSTER2 sysdump --output-filename cilium-sysdump-out-c2-concurrency-${{ matrix.test-concurrency }} shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently - name: Upload sysdump from cluster 1 if: ${{ !success() }} uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: - name: cilium-sysdump-out-c1.zip - path: cilium-sysdump-out-c1.zip + name: cilium-sysdump-out-c1-concurrency-${{ matrix.test-concurrency }}.zip + path: cilium-sysdump-out-c1-concurrency-${{ matrix.test-concurrency }}.zip retention-days: 5 - name: Upload sysdump from cluster 2 if: ${{ !success() }} uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: - name: cilium-sysdump-out-c2.zip - path: cilium-sysdump-out-c2.zip + name: cilium-sysdump-out-c2-concurrency-${{ matrix.test-concurrency }}.zip + path: cilium-sysdump-out-c2-concurrency-${{ matrix.test-concurrency }}.zip retention-days: 5 diff --git a/connectivity/check/context.go b/connectivity/check/context.go index be37f9498f..ad2d7877bc 100644 --- a/connectivity/check/context.go +++ b/connectivity/check/context.go @@ -781,6 +781,10 @@ func (ct *ConnectivityTest) modifyStaticRoutesForNodesWithoutCilium(ctx context. return nil } +// multiClusterClientLock protects K8S client instantiation (Scheme registration) +// for the cluster mesh setup in case of connectivity test concurrency > 1 +var multiClusterClientLock = sync.Mutex{} + // initClients checks if Cilium is installed on the cluster, whether the cluster // has multiple nodes, and whether or not monitor aggregation is enabled. // TODO(timo): Split this up, it does a lot. @@ -833,6 +837,8 @@ func (ct *ConnectivityTest) initClients(ctx context.Context) error { ct.params.SingleNode = true } } else if ct.params.MultiCluster != "" { + multiClusterClientLock.Lock() + defer multiClusterClientLock.Unlock() dst, err := k8s.NewClient(ct.params.MultiCluster, "", ct.params.CiliumNamespace) if err != nil { return fmt.Errorf("unable to create Kubernetes client for remote cluster %q: %w", ct.params.MultiCluster, err)