diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 50c7503f3eb..c65f2abd598 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -4,10 +4,12 @@ def CUSTOM_WORKSPACE = 'none' def HOMEgfs = 'none' def CI_CASES = '' def GH = 'none' +// Map of the machine names (MACHINE_ID) to the Jenkins Node names +def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaeac5: 'GaeaC5', gaeac6: 'Gaeac6-EMC'] // Location of the custom workspaces for each machine in the CI system. They are persistent for each iteration of the PR. -def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea', gaeac6: 'Gaeac6-EMC'] -def custom_workspace = [hera: '/scratch1/NCEPDEV/global/glopara/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/global/role-global/GFS_CI_CD/HERCULES/CI', gaea: '/gpfs/f5/epic/proj-shared/global/CI', gaeac6: '/gpfs/f6/drsa-precip3/proj-shared/global/CI'] +def custom_workspace = [hera: '/scratch1/NCEPDEV/global/glopara/CI', orion: '/work2/noaa/global/role-global/GFS_CI_CD/ORION/CI', hercules: '/work2/noaa/global/role-global/GFS_CI_CD/HERCULES/CI', gaeac5: '/gpfs/f5/epic/proj-shared/global/CI', gaeac6: '/gpfs/f6/drsa-precip3/proj-shared/global/CI'] def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' + def STATUS = 'Passed' pipeline { @@ -22,7 +24,6 @@ pipeline { stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR // which is used to designate the Nodes in the Jenkins Controller by the agent label // Each Jenkins Node is connected to said machine via an JAVA agent via an ssh tunnel - // no op 2 stage('1. Get Machine') { agent { label 'built-in' } @@ -35,6 +36,7 @@ pipeline { } def run_nodes = [] + def machine_names = [] if (isSpawnedFromAnotherJob) { echo "machine being set to value passed to this spawned job" echo "passed machine: ${params.machine}" @@ -42,29 +44,33 @@ pipeline { } else { echo "This is parent job so getting list of nodes matching labels:" for (label in pullRequest.labels) { + echo "label in pullReqest: ${label}" if (label.matches("CI-(.*?)-Ready")) { + echo "Found Ready Label: ${label}" def machine_name = label.split('-')[1].toString().toLowerCase() jenkins.model.Jenkins.get().computers.each { c -> if (c.node.selfLabel.name == NodeName[machine_name]) { run_nodes.add(c.node.selfLabel.name) + machine_names.add(machine_name) // record machine name alongside node } } } } - // Spawning all the jobs on the nodes matching the labels + // Spawning jobs using both run_nodes and machine_names arrays if (run_nodes.size() > 1) { - run_nodes.init().each { node -> - def machine_name = node.split('-')[0].toLowerCase() + for (int i = 0; i < run_nodes.size() - 1; i++) { + def node = run_nodes[i] + def machine_name = machine_names[i] // use the corresponding machine name echo "Spawning job on node: ${node} with machine name: ${machine_name}" build job: "/global-workflow/EMC-Global-Pipeline/PR-${env.CHANGE_ID}", parameters: [ string(name: 'machine', value: machine_name), - string(name: 'Node', value: node) ], - wait: false + string(name: 'Node', value: node) + ], wait: false } - machine = run_nodes.last().split('-')[0].toLowerCase() + machine = machine_names[run_nodes.size() - 1] echo "Running parent job: ${machine}" } else { - machine = run_nodes[0].split('-')[0].toLowerCase() + machine = machine_names[0] echo "Running only the parent job: ${machine}" } } @@ -76,10 +82,11 @@ pipeline { agent { label NodeName[machine].toLowerCase() } steps { script { + // Capitalize the first letter of the machine name and use if for labels Machine = machine[0].toUpperCase() + machine.substring(1) echo "Getting Common Workspace for ${Machine}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { - properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'Gaea', 'GaeaC6-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) + properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC', 'GaeaC5', 'GaeaC6-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" HOMEgfs = "${CUSTOM_WORKSPACE}/global-workflow" @@ -184,18 +191,20 @@ pipeline { agent { label NodeName[machine].toLowerCase() } steps { script { + ws(HOMEgfs) { def parallelStages = CI_CASES.collectEntries { caseName -> ["${caseName}": { stage("Create ${caseName}") { script { env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" + def error_output = "" try { error_output = sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml """, returnStdout: true).trim() } catch (Exception error_create) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body '${caseName} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n```\n${error_output}```' """) error("Case ${caseName} failed to create experiment directory") } } @@ -271,8 +280,9 @@ pipeline { }] } parallel parallelStages + [failFast: true] - } - } + } + } + } } stage( '5. Finalize' ) { diff --git a/ci/platforms/config.orion b/ci/platforms/config.orion index 880546908e6..5188300d159 100644 --- a/ci/platforms/config.orion +++ b/ci/platforms/config.orion @@ -1,18 +1,25 @@ #!/usr/bin/bash -export GFS_CI_ROOT=/work2/noaa/stmp/GFS_CI_ROOT/ORION +# Main CI root directory +export GFS_CI_ROOT=/work2/noaa/global/${USER}/GFS_CI_CD/ORION +# ICSDIR root directory used on the create_experment.py command line export ICSDIR_ROOT=/work/noaa/global/glopara/data/ICSDIR -# Jenkins directories -export JENKINS_AGENT_LAUNCH_DIR=/home/role-nems/GFS_CI_ROOT_JENKINS/AGENT_mterry -export JENKINS_WORK_DIR=/home/role-nems/GFS_CI_ROOT_JENKINS +# JENKINS launch directory for agent +export JENKINS_AGENT_LAUNCH_DIR=${GFS_CI_ROOT}/Jenkins/agent +# JENKINS internal working directories for CI jobs (not for users use) +export JENKINS_WORK_DIR=${GFS_CI_ROOT}/Jenkins/workspace +# NOTE: JENKINS custom_workspace directory where CI jobs are run +# /work2/noaa/global/role-global/GFS_CI_CD/ORION/CI +# is defined in the Jenkinsfile + +# CTest functional test directories for pre stagged input data +export STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR # CI BASH test directories export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI -# CTest functional test directories for pre stagged input data -export STAGED_TESTS_DIR=/work/noaa/stmp/GFS_CI_ROOT/ORION/STAGED_TESTS_DIR - -export HPC_ACCOUNT=nems +# HPC account which overides the default account +export HPC_ACCOUNT=fv3-cpu export max_concurrent_cases=5 export max_concurrent_pr=4 diff --git a/ci/scripts/utils/launch_java_agent.sh b/ci/scripts/utils/launch_java_agent.sh index a4da13b227f..9d9bd130d1f 100755 --- a/ci/scripts/utils/launch_java_agent.sh +++ b/ci/scripts/utils/launch_java_agent.sh @@ -164,6 +164,13 @@ check_node_online() { lauch_agent () { echo "Launching Jenkins Agent on ${host} using internal workspace ${JENKINS_WORK_DIR}" + + # Clear the remoting cache + if [[ -d "${JENKINS_WORK_DIR}/remoting" ]]; then + echo "Clearing remoting cache in ${JENKINS_WORK_DIR}/remoting" + rm -rf "${JENKINS_WORK_DIR}/remoting" + fi + command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}" echo -e "Launching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}" ${command} >> "${LOG}" 2>&1 & diff --git a/workflow/hosts/orion.yaml b/workflow/hosts/orion.yaml index f1ef2c8b944..adc9547e0a6 100644 --- a/workflow/hosts/orion.yaml +++ b/workflow/hosts/orion.yaml @@ -4,9 +4,9 @@ BASE_DATA: '/work/noaa/global/glopara/data' BASE_IC: '/work/noaa/global/glopara/data/ICSDIR' PACKAGEROOT: '/work/noaa/global/glopara/nwpara' COMINsyn: '/work/noaa/global/glopara/com/gfs/prod/syndat' -HOMEDIR: '/work/noaa/global/${USER}' -STMP: '/work/noaa/stmp/${USER}/ORION' -PTMP: '/work/noaa/stmp/${USER}/ORION' +HOMEDIR: '/work2/noaa/global/${USER}/ORION' +STMP: '/work2/noaa/global/${USER}/ORION/STMP' +PTMP: '/work2/noaa/global/${USER}/ORION/PTMP' NOSCRUB: $HOMEDIR SCHEDULER: slurm ACCOUNT: fv3-cpu