Skip to content
This repository was archived by the owner on Sep 17, 2024. It is now read-only.
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 73 additions & 8 deletions .ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

@Library('apm@current') _

import groovy.transform.Field

/**
Store the worker status so if the CI worker behaves wrongy then let's rerun the stage again
*/
@Field def workersStatus = [:]

pipeline {
agent { label 'ubuntu-20.04 && immutable && docker' }
environment {
Expand Down Expand Up @@ -419,13 +426,15 @@ def checkTestSuite(Map parallelTasks = [:], Map item = [:]) {
def platform = rawPlatform.trim()
log(level: 'INFO', text: "Adding ${suite}:${platform}:${tags} test suite to the build execution")
def machineInfo = getMachineInfo(platform)
parallelTasks["${suite}_${platform}_${tags}"] = generateFunctionalTestStep(name: "${name}",
platform: platform,
provider: scenarioProvider,
suite: "${suite}",
tags: "${tags}",
pullRequestFilter: "${pullRequestFilter}",
machine: machineInfo)
def stageName = "${suite}_${platform}_${tags}"
parallelTasks["${stageName}"] = generateFunctionalTestStep(name: "${name}",
platform: platform,
provider: scenarioProvider,
suite: "${suite}",
tags: "${tags}",
pullRequestFilter: "${pullRequestFilter}",
machine: machineInfo,
stageName: stageName)
}
}
}
Expand Down Expand Up @@ -486,6 +495,7 @@ def generateFunctionalTestStep(Map args = [:]){
def tags = args.get('tags')
def pullRequestFilter = args.get('pullRequestFilter')?.trim() ? args.get('pullRequestFilter') : ''
def machine = args.get('machine')
def stageName = args.get('stageName')

// TODO: Is this still relevant?
if (isPR() || isUpstreamTrigger(filter: 'PR-')) {
Expand Down Expand Up @@ -528,7 +538,9 @@ def generateFunctionalTestStep(Map args = [:]){
envContext.add("NODE_USER=${machine.username}")

return {
withNode(labels: 'ubuntu-20.04 && gobld/machineType:e2-small', forceWorkspace: true, forceWorker: true){
// Set the worker as flaky for the time being, this will be changed in the finally closure.
setFlakyWorker(stageName)
retryWithNode(labels: 'ubuntu-20.04 && gobld/machineType:e2-small', forceWorkspace: true, forceWorker: true, stageName: stageName){
try {
deleteDir()
dir("${env.REAL_BASE_DIR}") {
Expand Down Expand Up @@ -569,6 +581,12 @@ def generateFunctionalTestStep(Map args = [:]){
} finally {
withEnv(envContext) {
dir("${env.REAL_BASE_DIR}") {
// If it reaches this point then the CI worker is most likely behaving correctly
// there is still a chance things might fail afterwards, but this is just the finally
// section so we could say we are good to go.
// It runs after dir so if the worker is gone the an error will be thrown regarding
// the dir cannot be accessed in the existing none worker.
unsetFlakyWorker(stageName)
def testRunnerIP = getNodeIp("node")
sh "mkdir -p outputs/${testRunnerIP} || true"
ciBuild() {
Expand Down Expand Up @@ -604,3 +622,50 @@ def generateFunctionalTestStep(Map args = [:]){
}
}
}

def retryWithNode(Map args = [:], Closure body) {
try {
// Increment the retries
Comment thread
v1v marked this conversation as resolved.
Outdated
incrementRetries(args.stageName)
withNode(args){
body()
}
} catch (err) {
log(level: 'WARN', text: "Stage '${args.stageName}' failed, let's analyse if it's a flaky CI worker.")
// if the given worker while running it's stage wont' be true
Comment thread
v1v marked this conversation as resolved.
Outdated
if (isFlakyWorker(args.stageName) && isRetryAvailable(args.stageName)) {
log(level: 'INFO', text: "Rerun '${args.stageName}' in a new worker.")
retryWithNode(args) {
body()
}
} else {
error("Error '${err.toString()}'")
}
}
}

def isFlakyWorker(stageName) {
if (workersStatus.get(stageName)) {
return !workerStatus.get(stageName).get('status', true)
}
return false
}

def isRetryAvailable(stageName) {
return workerStatus.get(args.stageName).get('retries', 2) < 2
}

def incrementRetries(stageName) {
def current = workerStatus.get(stageName).get('retries', 0)
workerStatus[stageName].retries = current + 1
}

def setFlakyWorker(stageName) {
// set the status to be failed for now
Comment thread
v1v marked this conversation as resolved.
Outdated
workersStatus[stageName].status = false
}

def unsetFlakyWorker(stageName) {
// set the status to be failed for now
Comment thread
v1v marked this conversation as resolved.
Outdated
workersStatus[stageName].status = true
}