Skip to content

Commit

Permalink
Improve e2e test reliability (#2580)
Browse files Browse the repository at this point in the history
Signed-off-by: jorturfer <[email protected]>
  • Loading branch information
Jorge Turrado Ferrero authored Feb 11, 2022
1 parent efca71d commit 130bc93
Show file tree
Hide file tree
Showing 27 changed files with 696 additions and 497 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

### Improvements

- TODO ([#XXX](https://github.com/kedacore/keda/issue/XXX))
- Improve e2e tests reliability ([#2580](https://github.com/kedacore/keda/issues/2580))

### Breaking Changes

Expand Down
621 changes: 431 additions & 190 deletions config/crd/bases/keda.sh_scaledjobs.yaml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"require": [
"ts-node/register"
],
"timeout": "10m"
"timeout": "30m"
},
"scripts": {
"test": "ava"
Expand Down
40 changes: 37 additions & 3 deletions tests/run-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ E2E_REGEX=${E2E_TEST_REGEX:-*.test.ts}
DIR=$(dirname "$0")
cd $DIR

concurrent_tests_limit=5
concurrent_tests_limit=6
pids=()
lookup=()
failed_count=0
Expand All @@ -23,18 +23,52 @@ function run_tests {
for test_case in $(find scalers -name "$E2E_REGEX" | shuf)
do
counter=$((counter+1))
./node_modules/.bin/ava $test_case > "${test_case}.log" 2>&1 &
./node_modules/.bin/ava $test_case > "${test_case}.1.log" 2>&1 &
pid=$!
echo "Running $test_case with pid: $pid"
pids+=($pid)
lookup[$pid]=$test_case
# limit concurrent runs
if [[ "$counter" -gt "$concurrent_tests_limit" ]]; then
if [[ "$counter" -ge "$concurrent_tests_limit" ]]; then
wait_for_jobs
counter=0
pids=()
fi
done

wait_for_jobs

# Retry failing tests
if [ ${#failed_lookup[@]} -ne 0 ]; then

printf "\n\n##############################################\n"
printf "##############################################\n\n"
printf "FINISHED FIRST EXECUTION, RETRYING FAILING TESTS"
printf "\n\n##############################################\n"
printf "##############################################\n\n"

retry_lookup=("${failed_lookup[@]}")
counter=0
pids=()
failed_count=0
failed_lookup=()

for test_case in "${retry_lookup[@]}"
do
counter=$((counter+1))
./node_modules/.bin/ava $test_case > "${test_case}.2.log" 2>&1 &
pid=$!
echo "Rerunning $test_case with pid: $pid"
pids+=($pid)
lookup[$pid]=$test_case
# limit concurrent runs
if [[ "$counter" -ge "$concurrent_tests_limit" ]]; then
wait_for_jobs
counter=0
pids=()
fi
done
fi
}

function mark_failed {
Expand Down
6 changes: 0 additions & 6 deletions tests/scalers/activemq.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,6 @@ spec:
name: mqtt
protocol: TCP
resources:
requests:
memory: 500Mi
cpu: 200m
limits:
memory: 1000Mi
cpu: 400m
volumeMounts:
- name: activemq-config
mountPath: /opt/apache-activemq-5.16.3/webapps/api/WEB-INF/classes/jolokia-access.xml
Expand Down
12 changes: 6 additions & 6 deletions tests/scalers/argo-rollouts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,31 +74,31 @@ test.serial(`Rollouts should scale to 5 (the max) with HTTP Requests exceeding i

// keda based rollout should start scaling up with http requests issued
let replicaCount = '0'
for (let i = 0; i < 60 && replicaCount !== '5'; i++) {
t.log(`Waited ${5 * i} seconds for prometheus-based rollout to scale up`)
for (let i = 0; i < 120 && replicaCount !== '5'; i++) {
t.log(`Waited ${10 * i} seconds for prometheus-based rollout to scale up`)
const jobLogs = sh.exec(`kubectl logs -l job-name=generate-requests -n ${testNamespace}`).stdout
t.log(`Logs from the generate requests: ${jobLogs}`)

replicaCount = sh.exec(
`kubectl get rollouts.argoproj.io/keda-test-app --namespace ${testNamespace} -o jsonpath="{.spec.replicas}"`
).stdout
if (replicaCount !== '5') {
await sleep(5000)
await sleep(10000)
}
}

t.is('5', replicaCount, 'Replica count should be maxed at 5')

for (let i = 0; i < 50 && replicaCount !== '0'; i++) {
for (let i = 0; i < 90 && replicaCount !== '0'; i++) {
replicaCount = sh.exec(
`kubectl get rollouts.argoproj.io/keda-test-app --namespace ${testNamespace} -o jsonpath="{.spec.replicas}"`
).stdout
if (replicaCount !== '0') {
await sleep(5000)
await sleep(10000)
}
}

t.is('0', replicaCount, 'Replica count should be 0 after 3 minutes')
t.is('0', replicaCount, 'Replica count should be 0 after 15 minutes')
})

test.after.always.cb('clean up argo-rollouts testing deployment', t => {
Expand Down
3 changes: 0 additions & 3 deletions tests/scalers/artemis-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,6 @@ spec:
image: docker.io/vromero/activemq-artemis:2.6.2
imagePullPolicy:
resources:
requests:
cpu: 100m
memory: 256Mi
env:
- name: ARTEMIS_PASSWORD
valueFrom:
Expand Down
59 changes: 32 additions & 27 deletions tests/scalers/azure-pipelines.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const projectName = process.env['AZURE_DEVOPS_PROJECT']
const buildDefinitionID = process.env['AZURE_DEVOPS_BUILD_DEFINITON_ID']
const poolName = process.env['AZURE_DEVOPS_POOL_NAME']

let poolID: number

test.before(async t => {
if (!organizationURL || !personalAccessToken || !projectName || !buildDefinitionID || !poolName) {
t.fail('AZURE_DEVOPS_ORGANIZATION_URL, AZURE_DEVOPS_PAT, AZURE_DEVOPS_PROJECT, AZURE_DEVOPS_BUILD_DEFINITON_ID and AZURE_DEVOPS_POOL_NAME environment variables are required for azure pipelines tests')
Expand All @@ -25,7 +27,7 @@ test.before(async t => {

let taskAgent: ta.ITaskAgentApiBase = await connection.getTaskAgentApi();
let agentPool: ti.TaskAgentPool[] = await taskAgent.getAgentPools(poolName)
let poolID: number = agentPool[0].id
poolID = agentPool[0].id

if(!poolID) {
t.fail("failed to convert poolName to poolID")
Expand All @@ -40,38 +42,43 @@ test.before(async t => {
.replace('{{AZP_URL}}', organizationURL))
sh.exec(`kubectl create namespace ${defaultNamespace}`)
t.is(0, sh.exec(`kubectl apply -f ${deployFile.name} --namespace ${defaultNamespace}`).code, 'creating a deployment should work.')
})

test.serial('Deployment should have 1 replicas on start', async t => {
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should start out as 1')
})


test.serial('Deployment should have 0 replicas after scale', async t => {
// wait for the first agent to be registered in the agent pool
await sleep(20 * 1000)

const scaledObjectFile = tmp.fileSync()
fs.writeFileSync(scaledObjectFile.name, poolIdScaledObject
.replace('{{AZP_POOL_ID}}', poolID.toString()))
t.is(0, sh.exec(`kubectl apply -f ${scaledObjectFile.name} --namespace ${defaultNamespace}`).code, 'creating ScaledObject with poolId should work.')
})

test.serial('Deployment should have 1 replicas on start', async t => {
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should start out as 1')
t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 0 if no pending jobs')
})

test.serial('PoolID: Deployment should scale to 3 replicas after queueing 3 jobs', async t => {

test.serial('PoolID: Deployment should scale to 1 replica after queueing job', async t => {
let authHandler = azdev.getPersonalAccessTokenHandler(personalAccessToken);
let connection = new azdev.WebApi(organizationURL, authHandler);
let build: ba.IBuildApi = await connection.getBuildApi();
var definitionID = parseInt(buildDefinitionID)

// wait for the first agent to be registered in the agent pool
await sleep(20 * 1000)
await build.queueBuild(null, projectName, null, null, null, definitionID)

for(let i = 0; i < 3; i++) {
await build.queueBuild(null, projectName, null, null, null, definitionID)
}

t.true(await waitForDeploymentReplicaCount(3, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 3 after starting 3 jobs')
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 1 after starting a job')
})

test.serial('PoolID: Deployment should scale to 1 replica after finishing 3 jobs', async t => {
test.serial('PoolID: Deployment should scale to 0 replicas after finishing job', async t => {
// wait 10 minutes for the jobs to finish and scale down
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 60, 10000), 'replica count should be 1 after finishing 3 jobs')
t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 10000), 'replica count should be 0 after finishing')
})

test.serial('PoolName: Deployment should scale to 3 replicas after queueing 3 jobs', async t => {
test.serial('PoolName: Deployment should scale to 1 replica after queueing job', async t => {
const poolNameScaledObjectFile = tmp.fileSync()
fs.writeFileSync(poolNameScaledObjectFile.name, poolNameScaledObject
.replace('{{AZP_POOL}}', poolName))
Expand All @@ -82,16 +89,14 @@ test.serial('PoolName: Deployment should scale to 3 replicas after queueing 3 jo
let build: ba.IBuildApi = await connection.getBuildApi();
var definitionID = parseInt(buildDefinitionID)

for(let i = 0; i < 3; i++) {
await build.queueBuild(null, projectName, null, null, null, definitionID)
}
await build.queueBuild(null, projectName, null, null, null, definitionID)

t.true(await waitForDeploymentReplicaCount(3, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 3 after starting 3 jobs')
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 30, 5000), 'replica count should be 1 after starting a job')
})

test.serial('PoolName: should scale to 1 replica after finishing 3 jobs', async t => {
test.serial('PoolName: should scale to 0 replicas after finishing job', async t => {
// wait 10 minutes for the jobs to finish and scale down
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', defaultNamespace, 60, 10000), 'replica count should be 1 after finishing 3 jobs')
t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 10000), 'replica count should be 0 after finishing')
})

test.after.always('clean up azure-pipelines deployment', t => {
Expand Down Expand Up @@ -157,9 +162,9 @@ metadata:
spec:
scaleTargetRef:
name: test-deployment
minReplicaCount: 1
maxReplicaCount: 3
pollingInterval: 50
minReplicaCount: 0
maxReplicaCount: 1
pollingInterval: 30
cooldownPeriod: 60
advanced:
horizontalPodAutoscalerConfig:
Expand All @@ -179,9 +184,9 @@ metadata:
spec:
scaleTargetRef:
name: test-deployment
minReplicaCount: 1
maxReplicaCount: 3
pollingInterval: 50
minReplicaCount: 0
maxReplicaCount: 1
pollingInterval: 30
cooldownPeriod: 60
advanced:
horizontalPodAutoscalerConfig:
Expand Down
48 changes: 20 additions & 28 deletions tests/scalers/azure-queue-restore-original-replicas.test.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
import * as azure from 'azure-storage'
import * as fs from 'fs'
import * as sh from 'shelljs'
import * as tmp from 'tmp'
import test from 'ava'
import {waitForDeploymentReplicaCount} from "./helpers";

const defaultNamespace = 'azure-queue-restore-original-replicas-test'
const queueName = 'queue-name-restore'
const connectionString = process.env['TEST_STORAGE_CONNECTION_STRING']

test.before(t => {
if (!connectionString) {
t.fail('TEST_STORAGE_CONNECTION_STRING environment variable is required for queue tests')
}

const queueSvc = azure.createQueueService(connectionString)
queueSvc.messageEncoder = new azure.QueueMessageEncoder.TextBase64QueueMessageEncoder()
queueSvc.createQueueIfNotExists(queueName, _ => {})

sh.config.silent = true
const base64ConStr = Buffer.from(connectionString).toString('base64')
const tmpFile = tmp.fileSync()
Expand All @@ -23,11 +30,8 @@ test.before(t => {
)
})

test.serial('Deployment should have 2 replicas on start', t => {
const replicaCount = sh.exec(
`kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
).stdout
t.is(replicaCount, '2', 'replica count should start out as 2')
test.serial('Deployment should have 2 replicas on start', async t => {
t.true(await waitForDeploymentReplicaCount(2, 'test-deployment', defaultNamespace, 15, 1000), 'replica count should be 2 after 15 seconds')
})

test.serial('Creating ScaledObject should work', t => {
Expand All @@ -44,34 +48,16 @@ test.serial('Creating ScaledObject should work', t => {

test.serial(
'Deployment should scale to 0 and then shold be back to 2 after deletion of ScaledObject',
t => {
let replicaCount = '100'
for (let i = 0; i < 50 && replicaCount !== '0'; i++) {
replicaCount = sh.exec(
`kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
).stdout
if (replicaCount !== '0') {
sh.exec('sleep 5s')
}
}
t.is('0', replicaCount, 'Replica count should be 0')

async t => {
t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 0 after 2 minutes')

t.is(
0,
sh.exec(`kubectl delete scaledobject.keda.sh/test-scaledobject --namespace ${defaultNamespace}`).code,
'deletion of ScaledObject should work.'
)

for (let i = 0; i < 50 && replicaCount !== '2'; i++) {
replicaCount = sh.exec(
`kubectl get deployment.apps/test-deployment --namespace ${defaultNamespace} -o jsonpath="{.spec.replicas}"`
).stdout
if (replicaCount !== '2') {
sh.exec('sleep 5s')
}
}
t.is('2', replicaCount, 'Replica count should be back at orignal 2')
t.true(await waitForDeploymentReplicaCount(2, 'test-deployment', defaultNamespace, 120, 1000), 'replica count should be 2 after 2 minutes')
}
)

Expand All @@ -86,7 +72,13 @@ test.after.always.cb('clean up azure-queue deployment', t => {
sh.exec(`kubectl delete ${resource} --namespace ${defaultNamespace}`)
}
sh.exec(`kubectl delete namespace ${defaultNamespace}`)
t.end()

// delete test queue
const queueSvc = azure.createQueueService(connectionString)
queueSvc.deleteQueueIfExists(queueName, err => {
t.falsy(err, 'should delete test queue successfully')
t.end()
})
})

const deployYaml = `apiVersion: v1
Expand Down Expand Up @@ -145,5 +137,5 @@ spec:
triggers:
- type: azure-queue
metadata:
queueName: queue-name
queueName: ${queueName}
connectionFromEnv: AzureWebJobsStorage`
7 changes: 5 additions & 2 deletions tests/scalers/azure-queue-trigger-auth.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import test from 'ava'
import {waitForDeploymentReplicaCount} from "./helpers";

const testNamespace = 'azure-queue-auth-test'
const queueName = 'queue-name'
const queueName = 'queue-name-trigger'
const connectionString = process.env['TEST_STORAGE_CONNECTION_STRING']

test.before(async t => {
Expand Down Expand Up @@ -44,7 +44,10 @@ test.serial(
)

// Scaling out when messages available
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', testNamespace, 60, 1000), 'replica count should be 3 after 1 minute')
t.true(await waitForDeploymentReplicaCount(1, 'test-deployment', testNamespace, 60, 1000), 'replica count should be 1 after 1 minute')

queueSvc.clearMessages(queueName, _ => {})

// Scaling in when no available messages
t.true(await waitForDeploymentReplicaCount(0, 'test-deployment', testNamespace, 300, 1000), 'replica count should be 0 after 5 minute')
}
Expand Down
Loading

0 comments on commit 130bc93

Please sign in to comment.