diff --git a/Jenkinsfile b/Jenkinsfile index df81456317cb..2b46121af7e6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,6 +2,14 @@ @Library('apm@current') _ + +import groovy.transform.Field + +/** + This is required to store the rerun stages to generate a file with the details. +*/ +@Field def rerunStages = [:] + pipeline { agent { label 'ubuntu-18 && immutable' } environment { @@ -184,6 +192,13 @@ VERSION=${env.VERSION}-SNAPSHOT""") archiveArtifacts artifacts: 'packaging.properties' } cleanup { + // Report rerun status + writeJSON(file: 'rerun.json', json: rerunStages) + archiveArtifacts(artifacts: 'rerun.json') + script { + log(level: 'INFO', text: 'Set description with the number of stage retries to help with the ES queries.') + currentBuild.description = "Stage retries: ${rerunStages?.size()}" + } // Required to enable the flaky test reporting with GitHub. Workspace exists since the post/always runs earlier dir("${BASE_DIR}"){ notifyBuildResult(prComment: true, @@ -288,7 +303,7 @@ def cloud(Map args = [:]) { } withCloudTestEnv() { try { - target(context: args.context, command: args.command, directory: args.directory, label: args.label, withModule: args.withModule, isMage: true, id: args.id) + runTargetWithRetry(context: args.context, command: args.command, directory: args.directory, label: args.label, withModule: args.withModule, isMage: true, id: args.id) } finally { terraformCleanup(name: args.directory, dir: args.directory) } @@ -541,6 +556,30 @@ def e2e(Map args = [:]) { } } +/** +* This method runs the given command with a retry in case of any failures. +*/ +def runTargetWithRetry(Map args = [:]) { + // It requires type -> https://github.com/jenkinsci/script-security-plugin/blob/2810139da6ecb5700eac4e46c580b8f26a3b1899/src/main/resources/org/jenkinsci/plugins/scriptsecurity/sandbox/whitelists/generic-whitelist#L1086 + Map arguments = args + arguments['numberOfRetries'] = 3 + def count = 1 + def failed = true + while (count <= arguments['numberOfRetries'] && failed) { + arguments['currentRetry'] = count + try { + log(level: 'INFO', text: "run '${arguments.context}' - ${count} out of ${arguments['numberOfRetries']}.") + target(arguments) + failed = false + } catch(e) { + log(level: 'WARN', text: "${arguments.context} failed - ${count} out of ${arguments['numberOfRetries']}, let's try again and discard any kind of flakiness.") + rerunStages["${arguments.context}"] = arguments + } finally { + count++ + } + } +} + /** * This method runs the given command supporting two kind of scenarios: * - make -C then the dir(location) is not required, aka by disaling isMage: false @@ -555,10 +594,12 @@ def target(Map args = [:]) { def isE2E = args.e2e?.get('enabled', false) def isPackaging = args.get('package', false) def dockerArch = args.get('dockerArch', 'amd64') + def numberOfRetries = args.get('numberOfRetries', 1) + def currentRetry = args.get('currentRetry', 1) def enableRetry = args.get('enableRetry', false) withNode(labels: args.label, forceWorkspace: true){ withGithubNotify(context: "${context}") { - withBeatsEnv(archive: true, withModule: withModule, directory: directory, id: args.id) { + withBeatsEnv(archive: true, withModule: withModule, directory: directory, id: args.id, numberOfRetries: numberOfRetries, currentRetry: currentRetry) { dumpVariables() // make commands use -C while mage commands require the dir(folder) // let's support this scenario with the location variable. @@ -598,8 +639,10 @@ def withBeatsEnv(Map args = [:], Closure body) { def archive = args.get('archive', true) def withModule = args.get('withModule', false) def directory = args.get('directory', '') + def numberOfRetries = args.get('numberOfRetries', 1) + def currentRetry = args.get('currentRetry', 1) - def path, magefile, pythonEnv, testResults, artifacts, gox_flags, userProfile + def path, magefile, pythonEnv, testResults, gox_flags, userProfile if(isUnix()) { gox_flags = (isArm() && is64arm()) ? '-arch arm' : '-arch amd64' @@ -607,7 +650,6 @@ def withBeatsEnv(Map args = [:], Closure body) { magefile = "${WORKSPACE}/.magefile" pythonEnv = "${WORKSPACE}/python-env" testResults = '**/build/TEST*.xml' - artifacts = '**/build/TEST*.out' } else { // NOTE: to support Windows 7 32 bits the arch in the mingw and go context paths is required. def mingwArch = is32() ? '32' : '64' @@ -617,7 +659,6 @@ def withBeatsEnv(Map args = [:], Closure body) { path = "${env.WORKSPACE}\\bin;${chocoPath};${chocoPython3Path};C:\\tools\\mingw${mingwArch}\\bin;${env.PATH}" magefile = "${env.WORKSPACE}\\.magefile" testResults = "**\\build\\TEST*.xml" - artifacts = "**\\build\\TEST*.out" gox_flags = '-arch 386' } @@ -654,8 +695,12 @@ def withBeatsEnv(Map args = [:], Closure body) { // Go/Mage installation is not anymore configured with env variables and installed // with installTools but delegated to the parent closure withMageEnv. installTools(args) - // Skip to upload the generated files by default. - def upload = false + + // flag with the body status, failed by default. + // this will help to: + // a) upload the system build artifacts if the body failed. + // b) if the last retry + def failed = true try { // Add more stability when dependencies are not accessible temporarily // See https://github.com/elastic/beats/issues/21609 @@ -665,13 +710,18 @@ def withBeatsEnv(Map args = [:], Closure body) { cmd(label: 'Download modules to local cache - retry', script: 'go mod download', returnStatus: true) } body() - } catch(err) { - // Upload the generated files ONLY if the step failed. This will avoid any overhead with Google Storage - upload = true - error("Error '${err.toString()}'") + // body didnt' fail so lets update the flag. + failed = false } finally { - if (archive) { - archiveTestOutput(testResults: testResults, artifacts: artifacts, id: args.id, upload: upload) + // Only if archive = true then report test result: + // a) if body() didn't fail. + // b) if no more retries to avoid test failures from a previous retry. + // + // no more retries => numberOfRetries == currentRetry + if (archive && (numberOfRetries == currentRetry || !failed)) { + archiveTestOutput(testResults: testResults, id: args.id, upload: failed) + } else { + log(level: 'WARN', text: "archiveTestOutput is disabled. Reason (archive: '${archive}', (numberOfRetries == currentRetry): ${numberOfRetries == currentRetry}, failed: ${failed})") } tearDown() }