Improve e2e test reliability (#2580)

Signed-off-by: jorturfer <[email protected]>
kedacore · Feb 11, 2022 · 130bc93 · 130bc93
1 parent efca71d
commit 130bc93
Show file tree

Hide file tree

Showing 27 changed files with 696 additions and 497 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -34,7 +34,7 @@
 
 ### Improvements
 
-- TODO ([#XXX](https://github.com/kedacore/keda/issue/XXX))
+- Improve e2e tests reliability ([#2580](https://github.com/kedacore/keda/issues/2580))
 
 ### Breaking Changes
 

diff --git a/config/crd/bases/keda.sh_scaledjobs.yaml b/config/crd/bases/keda.sh_scaledjobs.yaml
diff --git a/tests/package.json b/tests/package.json
@@ -9,7 +9,7 @@
     "require": [
       "ts-node/register"
     ],
-    "timeout": "10m"
+    "timeout": "30m"
   },
   "scripts": {
     "test": "ava"

diff --git a/tests/run-all.sh b/tests/run-all.sh
@@ -6,7 +6,7 @@ E2E_REGEX=${E2E_TEST_REGEX:-*.test.ts}
 DIR=$(dirname "$0")
 cd $DIR
 
-concurrent_tests_limit=5
+concurrent_tests_limit=6
 pids=()
 lookup=()
 failed_count=0
@@ -23,18 +23,52 @@ function run_tests {
     for test_case in $(find scalers -name "$E2E_REGEX" | shuf)
     do
         counter=$((counter+1))
-        ./node_modules/.bin/ava $test_case > "${test_case}.log" 2>&1 &
+        ./node_modules/.bin/ava $test_case > "${test_case}.1.log" 2>&1 &
         pid=$!
         echo "Running $test_case with pid: $pid"
         pids+=($pid)
         lookup[$pid]=$test_case
         # limit concurrent runs
-        if [[ "$counter" -gt "$concurrent_tests_limit" ]]; then
+        if [[ "$counter" -ge "$concurrent_tests_limit" ]]; then
             wait_for_jobs
             counter=0
             pids=()
         fi
     done
+
+    wait_for_jobs
+
+    # Retry failing tests
+    if [ ${#failed_lookup[@]} -ne 0 ]; then
+
+        printf "\n\n##############################################\n"
+        printf "##############################################\n\n"
+        printf "FINISHED FIRST EXECUTION, RETRYING FAILING TESTS"
+        printf "\n\n##############################################\n"
+        printf "##############################################\n\n"
+
+        retry_lookup=("${failed_lookup[@]}")
+        counter=0
+        pids=()
+        failed_count=0
+        failed_lookup=()
+
+        for test_case in "${retry_lookup[@]}"
+        do
+            counter=$((counter+1))
+            ./node_modules/.bin/ava $test_case > "${test_case}.2.log" 2>&1 &
+            pid=$!
+            echo "Rerunning $test_case with pid: $pid"
+            pids+=($pid)
+            lookup[$pid]=$test_case
+            # limit concurrent runs
+            if [[ "$counter" -ge "$concurrent_tests_limit" ]]; then
+                wait_for_jobs
+                counter=0
+                pids=()
+            fi
+        done
+    fi
 }
 
 function mark_failed {

diff --git a/tests/scalers/activemq.test.ts b/tests/scalers/activemq.test.ts
@@ -135,12 +135,6 @@ spec:
           name: mqtt
           protocol: TCP
         resources:
-          requests:
-            memory: 500Mi
-            cpu: 200m
-          limits:
-            memory: 1000Mi
-            cpu: 400m
         volumeMounts:
         - name: activemq-config
           mountPath: /opt/apache-activemq-5.16.3/webapps/api/WEB-INF/classes/jolokia-access.xml

diff --git a/tests/scalers/argo-rollouts.test.ts b/tests/scalers/argo-rollouts.test.ts
@@ -74,31 +74,31 @@ test.serial(`Rollouts should scale to 5 (the max) with HTTP Requests exceeding i
 
   // keda based rollout should start scaling up with http requests issued
   let replicaCount = '0'
-  for (let i = 0; i < 60 && replicaCount !== '5'; i++) {
-    t.log(`Waited ${5 * i} seconds for prometheus-based rollout to scale up`)
+  for (let i = 0; i < 120 && replicaCount !== '5'; i++) {
+    t.log(`Waited ${10 * i} seconds for prometheus-based rollout to scale up`)
     const jobLogs = sh.exec(`kubectl logs -l job-name=generate-requests -n ${testNamespace}`).stdout
     t.log(`Logs from the generate requests: ${jobLogs}`)
 
     replicaCount = sh.exec(
       `kubectl get rollouts.argoproj.io/keda-test-app --namespace ${testNamespace} -o jsonpath="{.spec.replicas}"`
     ).stdout
     if (replicaCount !== '5') {
-      await sleep(5000)
+      await sleep(10000)
     }
   }
 
   t.is('5', replicaCount, 'Replica count should be maxed at 5')
 
-  for (let i = 0; i < 50 && replicaCount !== '0'; i++) {
+  for (let i = 0; i < 90 && replicaCount !== '0'; i++) {
     replicaCount = sh.exec(
       `kubectl get rollouts.argoproj.io/keda-test-app --namespace ${testNamespace} -o jsonpath="{.spec.replicas}"`
     ).stdout
     if (replicaCount !== '0') {
-      await sleep(5000)
+      await sleep(10000)
     }
   }
 
-  t.is('0', replicaCount, 'Replica count should be 0 after 3 minutes')
+  t.is('0', replicaCount, 'Replica count should be 0 after 15 minutes')
 })
 
 test.after.always.cb('clean up argo-rollouts testing deployment', t => {

diff --git a/tests/scalers/artemis-helpers.ts b/tests/scalers/artemis-helpers.ts
@@ -159,9 +159,6 @@ spec:
           image: docker.io/vromero/activemq-artemis:2.6.2
           imagePullPolicy:
           resources:
-            requests:
-              cpu: 100m
-              memory: 256Mi
           env:
             - name: ARTEMIS_PASSWORD
               valueFrom:

diff --git a/tests/scalers/azure-pipelines.test.ts b/tests/scalers/azure-pipelines.test.ts
@@ -15,6 +15,8 @@ const projectName = process.env['AZURE_DEVOPS_PROJECT']
 const buildDefinitionID = process.env['AZURE_DEVOPS_BUILD_DEFINITON_ID']
 const poolName = process.env['AZURE_DEVOPS_POOL_NAME']
 
+let poolID: number
+
 test.before(async t => {
   if (!organizationURL || !personalAccessToken || !projectName || !buildDefinitionID || !poolName) {
     t.fail('AZURE_DEVOPS_ORGANIZATION_URL, AZURE_DEVOPS_PAT, AZURE_DEVOPS_PROJECT, AZURE_DEVOPS_BUILD_DEFINITON_ID and AZURE_DEVOPS_POOL_NAME environment variables are required for azure pipelines tests')
@@ -25,7 +27,7 @@ test.before(async t => {
 
   let taskAgent: ta.ITaskAgentApiBase = await connection.getTaskAgentApi();
   let agentPool: ti.TaskAgentPool[] = await taskAgent.getAgentPools(poolName)
-  let poolID: number = agentPool[0].id
+  poolID = agentPool[0].id
 
   if(!poolID) {
     t.fail("failed to convert poolName to poolID")
@@ -40,38 +42,43 @@ test.before(async t => {
       .replace('{{AZP_URL}}', organizationURL))
   sh.exec(`kubectl create namespace ${defaultNamespace}`)
   t.is(0, sh.exec(`kubectl apply -f ${deployFile.name} --namespace ${defaultNamespace}`).code, 'creating a deployment should work.')
+})
+
+test.serial('Deployment should have 1 replicas on start', async t => {
+  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should start out as 1')
+})
+
+
+test.serial('Deployment should have 0 replicas after scale', async t => {
+  // wait for the first agent to be registered in the agent pool
+  await sleep(20 * 1000)
+
   const scaledObjectFile = tmp.fileSync()
   fs.writeFileSync(scaledObjectFile.name, poolIdScaledObject
       .replace('{{AZP_POOL_ID}}', poolID.toString()))
   t.is(0, sh.exec(`kubectl apply -f ${scaledObjectFile.name} --namespace ${defaultNamespace}`).code, 'creating ScaledObject with poolId should work.')
-})
 
-test.serial('Deployment should have 1 replicas on start', async t => {
-  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should start out as 1')
+  t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 0 if no pending jobs')
 })
 
-test.serial('PoolID: Deployment should scale to 3 replicas after queueing 3 jobs', async t => {
+
+test.serial('PoolID: Deployment should scale to 1 replica after queueing job', async t => {
   let authHandler = azdev.getPersonalAccessTokenHandler(personalAccessToken);
   let connection = new azdev.WebApi(organizationURL, authHandler);
   let build: ba.IBuildApi = await connection.getBuildApi();
   var definitionID = parseInt(buildDefinitionID)
 
-  // wait for the first agent to be registered in the agent pool
-  await sleep(20 * 1000)
+  await build.queueBuild(null, projectName, null, null, null, definitionID)
 
-  for(let i = 0; i < 3; i++) {
-    await build.queueBuild(null, projectName, null, null, null, definitionID)
-  }
-
-  t.true(await waitForDeploymentReplicaCount(3, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 3 after starting 3 jobs')
+  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 1 after starting a job')
 })
 
-test.serial('PoolID: Deployment should scale to 1 replica after finishing 3 jobs', async t => {
+test.serial('PoolID: Deployment should scale to 0 replicas after finishing job', async t => {
   // wait 10 minutes for the jobs to finish and scale down
-  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 60, 10000), 'replica count should be 1 after finishing 3 jobs')
+  t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 10000), 'replica count should be 0 after finishing')
 })
 
-test.serial('PoolName: Deployment should scale to 3 replicas after queueing 3 jobs', async t => {
+test.serial('PoolName: Deployment should scale to 1 replica after queueing job', async t => {
   const poolNameScaledObjectFile = tmp.fileSync()
   fs.writeFileSync(poolNameScaledObjectFile.name, poolNameScaledObject
         .replace('{{AZP_POOL}}', poolName))
@@ -82,16 +89,14 @@ test.serial('PoolName: Deployment should scale to 3 replicas after queueing 3 jo
   let build: ba.IBuildApi = await connection.getBuildApi();
   var definitionID = parseInt(buildDefinitionID)
 
-  for(let i = 0; i < 3; i++) {
-    await build.queueBuild(null, projectName, null, null, null, definitionID)
-  }
+  await build.queueBuild(null, projectName, null, null, null, definitionID)
 
-  t.true(await waitForDeploymentReplicaCount(3, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 3 after starting 3 jobs')
+  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 1 after starting a job')
 })
 
-test.serial('PoolName: should scale to 1 replica after finishing 3 jobs', async t => {
+test.serial('PoolName: should scale to 0 replicas after finishing job', async t => {
   // wait 10 minutes for the jobs to finish and scale down
-  t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 60, 10000), 'replica count should be 1 after finishing 3 jobs')
+  t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 10000), 'replica count should be 0 after finishing')
 })
 
 test.after.always('clean up azure-pipelines deployment', t => {
@@ -157,9 +162,9 @@ metadata:
 spec:
   scaleTargetRef:
     name: test-deployment
-  minReplicaCount: 1
-  maxReplicaCount: 3
-  pollingInterval: 50
+  minReplicaCount: 0
+  maxReplicaCount: 1
+  pollingInterval: 30
   cooldownPeriod: 60
   advanced:
     horizontalPodAutoscalerConfig:
@@ -179,9 +184,9 @@ metadata:
 spec:
   scaleTargetRef:
     name: test-deployment
-  minReplicaCount: 1
-  maxReplicaCount: 3
-  pollingInterval: 50
+  minReplicaCount: 0
+  maxReplicaCount: 1
+  pollingInterval: 30
   cooldownPeriod: 60
   advanced:
     horizontalPodAutoscalerConfig:

diff --git a/tests/scalers/azure-queue-restore-original-replicas.test.ts b/tests/scalers/azure-queue-restore-original-replicas.test.ts
@@ -1,16 +1,23 @@
+import * as azure from 'azure-storage'
 import * as fs from 'fs'
 import * as sh from 'shelljs'
 import * as tmp from 'tmp'
 import test from 'ava'
+import {waitForDeploymentReplicaCount} from "./helpers";
 
 const defaultNamespace = 'azure-queue-restore-original-replicas-test'
+const queueName = 'queue-name-restore'
 const connectionString = process.env['TEST_STORAGE_CONNECTION_STRING']
 
 test.before(t => {
   if (!connectionString) {
     t.fail('TEST_STORAGE_CONNECTION_STRING environment variable is required for queue tests')
   }
 
+  const queueSvc = azure.createQueueService(connectionString)
+  queueSvc.messageEncoder = new azure.QueueMessageEncoder.TextBase64QueueMessageEncoder()
+  queueSvc.createQueueIfNotExists(queueName, _ => {})
+
   sh.config.silent = true
   const base64ConStr = Buffer.from(connectionString).toString('base64')
   const tmpFile = tmp.fileSync()
@@ -23,11 +30,8 @@ test.before(t => {
   )
 })
 
-test.serial('Deployment should have 2 replicas on start', t => {
-  const replicaCount = sh.exec(
-    `kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
-  ).stdout
-  t.is(replicaCount, '2', 'replica count should start out as 2')
+test.serial('Deployment should have 2 replicas on start', async t => {
+  t.true(await waitForDeploymentReplicaCount(2, 'test-deployment', defaultNamespace, 15, 1000), 'replica count should be 2 after 15 seconds')
 })
 
 test.serial('Creating ScaledObject should work', t => {
@@ -44,34 +48,16 @@ test.serial('Creating ScaledObject should work', t => {
 
 test.serial(
   'Deployment should scale to 0 and then shold be back to 2 after deletion of ScaledObject',
-  t => {
-    let replicaCount = '100'
-    for (let i = 0; i < 50 && replicaCount !== '0'; i++) {
-      replicaCount = sh.exec(
-        `kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
-      ).stdout
-      if (replicaCount !== '0') {
-        sh.exec('sleep 5s')
-      }
-    }
-    t.is('0', replicaCount, 'Replica count should be 0')
-
+  async t => {
+    t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 0 after 2 minutes')
 
     t.is(
       0,
       sh.exec(`kubectl delete scaledobject.keda.sh/test-scaledobject --namespace ${defaultNamespace}`).code,
       'deletion of ScaledObject should work.'
     )
 
-    for (let i = 0; i < 50 && replicaCount !== '2'; i++) {
-      replicaCount = sh.exec(
-        `kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
-      ).stdout
-      if (replicaCount !== '2') {
-        sh.exec('sleep 5s')
-      }
-    }
-    t.is('2', replicaCount, 'Replica count should be back at orignal 2')
+    t.true(await waitForDeploymentReplicaCount(2, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 2 after 2 minutes')
   }
 )
 
@@ -86,7 +72,13 @@ test.after.always.cb('clean up azure-queue deployment', t => {
     sh.exec(`kubectl delete ${resource} --namespace ${defaultNamespace}`)
   }
   sh.exec(`kubectl delete namespace ${defaultNamespace}`)
-  t.end()
+
+  // delete test queue
+  const queueSvc = azure.createQueueService(connectionString)
+  queueSvc.deleteQueueIfExists(queueName, err => {
+     t.falsy(err, 'should delete test queue successfully')
+     t.end()
+  })
 })
 
 const deployYaml = `apiVersion: v1
@@ -145,5 +137,5 @@ spec:
   triggers:
   - type: azure-queue
     metadata:
-      queueName: queue-name
+      queueName: ${queueName}
       connectionFromEnv: AzureWebJobsStorage`
diff --git a/tests/scalers/azure-queue-trigger-auth.test.ts b/tests/scalers/azure-queue-trigger-auth.test.ts
@@ -7,7 +7,7 @@ import test from 'ava'
 import {waitForDeploymentReplicaCount} from "./helpers";
 
 const testNamespace = 'azure-queue-auth-test'
-const queueName = 'queue-name'
+const queueName = 'queue-name-trigger'
 const connectionString = process.env['TEST_STORAGE_CONNECTION_STRING']
 
 test.before(async t => {
@@ -44,7 +44,10 @@ test.serial(
     )
 
     // Scaling out when messages available
-    t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', testNamespace, 60, 1000), 'replica count should be 3 after 1 minute')
+    t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', testNamespace, 60, 1000), 'replica count should be 1 after 1 minute')
+
+    queueSvc.clearMessages(queueName, _ => {})
+
     // Scaling in when no available messages
     t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', testNamespace, 300, 1000), 'replica count should be 0 after 5 minute')
   }