From 8724c6ebc83ddcf742b0d1e075b88e4507ed1952 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 17 Mar 2025 17:58:31 -0400 Subject: [PATCH 01/45] first pass version of testing out matix on gitlab pipeline --- ci/.gitlab-ci.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 ci/.gitlab-ci.yml diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml new file mode 100644 index 00000000000..65957dbeb1a --- /dev/null +++ b/ci/.gitlab-ci.yml @@ -0,0 +1,41 @@ +stages: + - build + - test + +variables: + SCHEDULE: "0 0 * * *" + WORKSPACE_DIRS: + orion: "/work2/noaa/global/role-global/GFS_CI_CD/ORION/GITLAB/CI" + hercules: "/work2/noaa/global/role-global/GFS_CI_CD/HERCULES/GITLAB/CI" + MACHINES: ["orion", "hercules"] + +build: + stage: build + script: + - sorc/build_compute.sh all + tags: + - ${MACHINE} + only: + - schedules + parallel: + matrix: + - MACHINE: ["orion", "hercules"] + strategy: + fetch: true + +test: + stage: test + script: + - cd ${WORKSPACE_DIRS[$MACHINE]} + - bash test.sh + tags: + - ${MACHINE} + only: + - schedules + parallel: + matrix: + - MACHINE: ["orion", "hercules"] + dependencies: + - build + strategy: + depend: true \ No newline at end of file From 91a30769570eff644b5a2645cb005330c398b921 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 17 Mar 2025 18:01:09 -0400 Subject: [PATCH 02/45] sharing the docker compose file for sandboxed GitLab --- ci/docker-compose.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 ci/docker-compose.yml diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml new file mode 100644 index 00000000000..ec6119b90eb --- /dev/null +++ b/ci/docker-compose.yml @@ -0,0 +1,20 @@ +services: + gitlab: + image: gitlab/gitlab-ce:17.6.4-ce.0 + container_name: gitlab + restart: always + hostname: 'localhost' + environment: + GITLAB_OMNIBUS_CONFIG: | + # Add any other gitlab.rb configuration here, each on its own line + external_url 'http://localhost:8929' + ports: + - '8929:8929' + - '443:443' + - '2424:22' + volumes: + - '/home/tmcguinness/GITLAB/config:/etc/gitlab' + - '/home/tmcguinness/GITLAB/logs:/var/log/gitlab' + - '/home/tmcguinness/GITLAB/data:/var/opt/gitlab' + shm_size: '256m' + From cb4c11e5eba8b58ba880e1421a17d70478bb64e1 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 16:39:41 -0400 Subject: [PATCH 03/45] updated run-check to optionally run over all the cases in the RUNTESTS EXPDIR dir --- ci/.gitlab-ci.yml | 42 ++++++++--- ci/docker-compose.yml | 1 + ci/scripts/run-check_ci.sh | 145 +++++++++++++++++++++---------------- 3 files changed, 114 insertions(+), 74 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 65957dbeb1a..2a06245edc4 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -4,15 +4,15 @@ stages: variables: SCHEDULE: "0 0 * * *" - WORKSPACE_DIRS: - orion: "/work2/noaa/global/role-global/GFS_CI_CD/ORION/GITLAB/CI" - hercules: "/work2/noaa/global/role-global/GFS_CI_CD/HERCULES/GITLAB/CI" - MACHINES: ["orion", "hercules"] + GIT_CLONE_PATH: '$CI_BUILDS_DIR/$CI_JOB_ID/$CI_PROJECT_NAME' build: stage: build script: - - sorc/build_compute.sh all + - export CI_PROJECT_DIR=$GIT_CLONE_PATH + - export HASH=$(git rev-parse HEAD) # Get git hash from the clone + - echo $HASH > .githash # Save hash to file + - ci/scripts/utils/ci_utils_wrapper.sh build_compute tags: - ${MACHINE} only: @@ -22,12 +22,19 @@ build: - MACHINE: ["orion", "hercules"] strategy: fetch: true + artifacts: + paths: + - .githash # Pass git hash to next stage + expire_in: 1 hour -test: - stage: test +setup: + stage: create_experments script: - - cd ${WORKSPACE_DIRS[$MACHINE]} - - bash test.sh + - export CI_PROJECT_DIR=$GIT_CLONE_PATH + - export HASH=$(cat .githash) # Retrieve git hash from artifact + - RUNTESTS_DIR="${CI_BUILD_DIR}/${CI_JOB_ID}/RUNTESTS" + - mkdir -p ${RUNTESTS_DIR} + - workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} tags: - ${MACHINE} only: @@ -38,4 +45,19 @@ test: dependencies: - build strategy: - depend: true \ No newline at end of file + depend: true + +run_tests: + stage: run_tests + script: + - export CI_PROJECT_DIR=$GIT_CLONE_PATH + - echo "Running tests for machine ${MACHINE}" # replace with actual test commands + tags: + - ${MACHINE} + only: + - schedules + parallel: + matrix: + - MACHINE: ["orion", "hercules"] + dependencies: + - setup \ No newline at end of file diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index ec6119b90eb..f3f612c63c0 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -17,4 +17,5 @@ services: - '/home/tmcguinness/GITLAB/logs:/var/log/gitlab' - '/home/tmcguinness/GITLAB/data:/var/opt/gitlab' shm_size: '256m' + user: "${UID}:${GID}" diff --git a/ci/scripts/run-check_ci.sh b/ci/scripts/run-check_ci.sh index 5c49a21c4ba..87e4d9d475b 100755 --- a/ci/scripts/run-check_ci.sh +++ b/ci/scripts/run-check_ci.sh @@ -9,7 +9,14 @@ set -eu TEST_DIR=${1:-${TEST_DIR:-?}} # Location of the root of the testing directory pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this script -SYSTEM_BUILD_DIR=${3:-"global-workflow"} # Name of the system build directory, default is "global-workflow +SYSTEM_BUILD_DIR=${3:-"global-workflow"} # Name of the system build directory, default is "global-workflow" +GET_PSLOTS=${4:-"false"} # Flag to get the list of pslots from the the directory pslot + +# Check for usage +if [[ $# -lt 2 || $# -gt 4 ]]; then + echo "Usage: $0 [SYSTEM_BUILD_DIR] [--get_pslot_list]" + exit 1 +fi # TEST_DIR contains 2 directories; # 1. HOMEgfs: clone of the global-workflow @@ -19,7 +26,7 @@ SYSTEM_BUILD_DIR=${3:-"global-workflow"} # Name of the system build directory, # ├── HOMEgfs # └── RUNTESTS # ├── COMROOT -# │   └── ${pslot} +# │ └── ${pslot} # └── EXPDIR # └── ${pslot} # Two system build directories created at build time gfs, and gdas @@ -32,80 +39,91 @@ run_check_logfile="${RUNTESTS}/ci-run_check.log" echo "Source modules." source "${HOMEgfs}/workflow/gw_setup.sh" -# cd into the experiment directory -echo "cd ${RUNTESTS}/EXPDIR/${pslot}" -cd "${RUNTESTS}/EXPDIR/${pslot}" || (echo "FATAL ERROR: Unable to cd into '${RUNTESTS}/EXPDIR/${pslot}', ABORT!"; exit 1) +if [[ "${GET_PSLOTS}" == "--get_pslot_list" ]]; then + pslot_list=$("${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh" get_pslot_list "${RUNTESTS}") +else + pslot_list=(${pslot}) +fi +echo "Experment being ran with rocotorun and rocotostat.py: ${pslot_list}" -# Name of the Rocoto XML and database files -xml="${pslot}.xml" -db="${pslot}.db" +for pslot in ${pslot_list[@]}; do -# Ensure the XML is present for the experiment -if [[ ! -f "${xml}" ]]; then - echo "FATAL ERROR: XML file ${xml} not found in '${pslot}', experiment ${pslot} failed, ABORT!" - exit 1 -fi + # cd into the experiment directory + echo "cd ${RUNTESTS}/EXPDIR/${pslot}" + cd "${RUNTESTS}/EXPDIR/${pslot}" || (echo "FATAL ERROR: Unable to cd into '${RUNTESTS}/EXPDIR/${pslot}', ABORT!"; exit 1) -# Launch experiment -echo "Launch experiment with Rocoto." -rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" -sleep 10 -if [[ ! -f "${db}" ]]; then - echo "FATAL ERROR: Rocoto database file ${db} not found, experiment ${pslot} failed, ABORT!" - exit 2 -fi + # Name of the Rocoto XML and database files + xml="${pslot}.xml" + db="${pslot}.db" -# Experiment launched -rc=99 -set +e -while true; do + # Ensure the XML is present for the experiment + if [[ ! -f "${xml}" ]]; then + echo "FATAL ERROR: XML file ${xml} not found in '${pslot}', experiment ${pslot} failed, ABORT!" + exit 1 + fi - echo "Run rocotorun." + # Launch experiment + echo "Launch experiment with Rocoto." rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" + sleep 10 + if [[ ! -f "${db}" ]]; then + echo "FATAL ERROR: Rocoto database file ${db} not found, experiment ${pslot} failed, ABORT!" + exit 2 + fi - # Wait before running rocotostat - sleep 60 + # Experiment launched + rc=99 + set +e + while true; do - # Get job statistics - echo "Gather Rocoto statistics" - # shellcheck disable=SC2312 # We want to use the exit code of the command - full_state=$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}" -v) - error_stat=$? + echo "Run rocotorun." + rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" - for state in CYCLES_TOTAL CYCLES_DONE SUCCEEDED FAIL DEAD; do - declare "${state}"="$(echo "${full_state}" | grep "${state}" | cut -d: -f2)" || true - done - ROCOTO_STATE=$(echo "${full_state}" | tail -1) || exit 1 + # Wait before running rocotostat + sleep 60 + + # Get job statistics + echo "Gather Rocoto statistics" + # shellcheck disable=SC2312 # We want to use the exit code of the command + full_state=$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}" -v) + error_stat=$? - echo -e "(${pslot} on ${MACHINE_ID^})\n\tTotal Cycles: ${CYCLES_TOTAL}\n\tNumber Cycles done: ${CYCLES_DONE}\n\tState: ${ROCOTO_STATE}" + for state in CYCLES_TOTAL CYCLES_DONE SUCCEEDED FAIL DEAD; do + declare "${state}"="$(echo "${full_state}" | grep "${state}" | cut -d: -f2)" || true + done + ROCOTO_STATE=$(echo "${full_state}" | tail -1) || exit 1 - if [[ ${error_stat} -ne 0 ]]; then - { - echo "Experiment ${pslot} Terminated with ${FAIL} tasks failed and ${DEAD} dead at $(date)" || true - echo "Experiment ${pslot} Terminated: *${ROCOTO_STATE}*" - } | tee -a "${run_check_logfile}" - if [[ "${DEAD}" -ne 0 ]]; then - error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true + echo -e "(${pslot} on ${MACHINE_ID^})\n\tTotal Cycles: ${CYCLES_TOTAL}\n\tNumber Cycles done: ${CYCLES_DONE}\n\tState: ${ROCOTO_STATE}" + + if [[ ${error_stat} -ne 0 ]]; then { - echo "Error logs:" - echo "${error_logs}" - } | tee -a "${run_check_logfile}" - rm -f "${RUNTESTS}/${pslot}_error.logs" - for log in ${error_logs}; do - echo "RUNTESTS${log#*RUNTESTS}" >> "${RUNTESTS}/${pslot}_error.logs" - done - fi - rc=1 - break - fi + echo "Experiment ${pslot} Terminated with ${FAIL} tasks failed and ${DEAD} dead at $(date)" || true + echo "Experiment ${pslot} Terminated: *${ROCOTO_STATE}*" + } | tee -a "${run_check_logfile}" + if [[ "${DEAD}" -ne 0 ]]; then + error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true + { + echo "Error logs:" + echo "${error_logs}" + } | tee -a "${run_check_logfile}" + rm -f "${RUNTESTS}/${pslot}_error.logs" + for log in ${error_logs}; do + echo "RUNTESTS${log#*RUNTESTS}" >> "${RUNTESTS}/${pslot}_error.logs" + done + fi + rc=1 + break + fi + + if [[ "${ROCOTO_STATE}" == "DONE" ]]; then + { + echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles: *SUCCESS* at $(date)" || true + } | tee -a "${run_check_logfile}" + rc=0 + break + fi - if [[ "${ROCOTO_STATE}" == "DONE" ]]; then - { - echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles: *SUCCESS* at $(date)" || true - } | tee -a "${run_check_logfile}" - rc=0 - break - fi + done # Wait before running rocotorun again sleep 300 @@ -113,4 +131,3 @@ while true; do done exit "${rc}" - From a774538a9a09f15ecc8b506d2748f237752b4824 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 16:43:16 -0400 Subject: [PATCH 04/45] added switch to run-check_ci to so all available pslots --- ci/.gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 2a06245edc4..08069f01ad3 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -51,7 +51,8 @@ run_tests: stage: run_tests script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - - echo "Running tests for machine ${MACHINE}" # replace with actual test commands + - source ${HOMEgfs}/workflow/gw_setup.sh + - ci/scripts/run-check_ci.sh ${CI_PROJECT_DIR} ${pslot} 'global-workflow' --get_pslot_list tags: - ${MACHINE} only: From 44277b14bc41fa533bc37ee3892e318f482086da Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 18:47:03 -0400 Subject: [PATCH 05/45] some helper scrpts for starting and testing runners --- ci/config.toml | 24 ++++++++++++++++++++++++ ci/gitlab_runner.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 ci/config.toml create mode 100644 ci/gitlab_runner.sh diff --git a/ci/config.toml b/ci/config.toml new file mode 100644 index 00000000000..705c1cba1f9 --- /dev/null +++ b/ci/config.toml @@ -0,0 +1,24 @@ +concurrent = 1 +check_interval = 0 +shutdown_timeout = 0 + +builds_dir = "/home/tmcguinness/GITLAB/RUNNER/CI" + +[session_server] + session_timeout = 1800 + +[[runners]] + name = "Orion-login-1.HPC.MsState.Edu" + url = "http://localhost:8929" + id = 1 + token = "glrt-t3_ZTdxY19NwELUjMTg8Kdn" + token_obtained_at = 2025-03-18T21:32:07Z + token_expires_at = 0001-01-01T00:00:00Z + executor = "shell" + [runners.cache] + MaxUploadedArchiveSize = 0 + [runners.cache.s3] + [runners.cache.gcs] + [runners.cache.azure] + [runners.custom_build_dir] + enabled = true diff --git a/ci/gitlab_runner.sh b/ci/gitlab_runner.sh new file mode 100644 index 00000000000..63f5c8db287 --- /dev/null +++ b/ci/gitlab_runner.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +TOKEN=glrt-t3_ZTdxY19NwELUjMTg8Kdn + +if [[ ! -f gitlab-runner ]]; then + curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 + sudo chmod +x ./gitlab-runner +fi + +# Check if the GitLab Runner user already exists +if id "gitlab-runner" &>/dev/null; then + echo "User gitlab-runner already exists." +else + sudo useradd --comment 'GitLab Runner' --create-home gitlab-runner --shell /bin/bash +fi + +if [[ $1 == "install_service" ]]; then + sudo gitlab-runner install --user=gitlab-runner --working-directory=/home/gitlab-runner + gitlab-runner start +fi + +if [[ $1 == "register" ]]; then + gitlab-runner register --url http://localhost:8929 --token ${TOKEN} \ + --executor shell \ + --description "gitlab-runner" \ + --name "orion" \ + --tag-list "orion" \ + --run-untagged="true" \ + --locked="false" \ + --access-level="not_protected" +fi From 0d9f7817c1ece7224a2740d3bad7835ef9b3254a Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 18:47:38 -0400 Subject: [PATCH 06/45] some helper scrpts for starting and testing runners 2 --- ci/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/config.toml b/ci/config.toml index 705c1cba1f9..2e8246357ad 100644 --- a/ci/config.toml +++ b/ci/config.toml @@ -11,7 +11,7 @@ builds_dir = "/home/tmcguinness/GITLAB/RUNNER/CI" name = "Orion-login-1.HPC.MsState.Edu" url = "http://localhost:8929" id = 1 - token = "glrt-t3_ZTdxY19NwELUjMTg8Kdn" + token = "token_redacted" token_obtained_at = 2025-03-18T21:32:07Z token_expires_at = 0001-01-01T00:00:00Z executor = "shell" From a02c6064709502985dfdb18057ef8614a8276b14 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 19:10:48 -0400 Subject: [PATCH 07/45] had to update args to gitlab-runner register --- ci/gitlab_runner.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/ci/gitlab_runner.sh b/ci/gitlab_runner.sh index 63f5c8db287..0091398ad34 100644 --- a/ci/gitlab_runner.sh +++ b/ci/gitlab_runner.sh @@ -20,12 +20,5 @@ if [[ $1 == "install_service" ]]; then fi if [[ $1 == "register" ]]; then - gitlab-runner register --url http://localhost:8929 --token ${TOKEN} \ - --executor shell \ - --description "gitlab-runner" \ - --name "orion" \ - --tag-list "orion" \ - --run-untagged="true" \ - --locked="false" \ - --access-level="not_protected" + gitlab-runner register -n -t ${TOKEN} --url http://localhost:8929 --executor shell fi From 8b12ecdfe99ef676ed71d49f6f5d358d4f01fb98 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 18 Mar 2025 20:34:36 -0400 Subject: [PATCH 08/45] added correct stage list --- ci/.gitlab-ci.yml | 49 ++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 08069f01ad3..e198444c087 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -1,33 +1,38 @@ stages: - build - - test - + - create_experments + - run_tests + variables: - SCHEDULE: "0 0 * * *" +# SCHEDULE: "0 0 * * *" GIT_CLONE_PATH: '$CI_BUILDS_DIR/$CI_JOB_ID/$CI_PROJECT_NAME' build: + variables: + GIT_STRATEGY: clone + GIT_SUBMODULE_STRATEGY: recursive + stage: build script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(git rev-parse HEAD) # Get git hash from the clone - echo $HASH > .githash # Save hash to file - ci/scripts/utils/ci_utils_wrapper.sh build_compute - tags: - - ${MACHINE} - only: - - schedules parallel: matrix: - - MACHINE: ["orion", "hercules"] - strategy: - fetch: true + - MACHINE: ["orion"] + tags: + - ${MACHINE} + #only: + # - schedules artifacts: paths: - .githash # Pass git hash to next stage expire_in: 1 hour setup: + variables: + GIT_STRATEGY: fetch stage: create_experments script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH @@ -35,30 +40,30 @@ setup: - RUNTESTS_DIR="${CI_BUILD_DIR}/${CI_JOB_ID}/RUNTESTS" - mkdir -p ${RUNTESTS_DIR} - workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} - tags: - - ${MACHINE} - only: - - schedules parallel: matrix: - - MACHINE: ["orion", "hercules"] + - MACHINE: ["orion"] + tags: + - ${MACHINE} + #only: + # - schedules dependencies: - build - strategy: - depend: true run_tests: + variables: + GIT_STRATEGY: fetch stage: run_tests script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - source ${HOMEgfs}/workflow/gw_setup.sh - ci/scripts/run-check_ci.sh ${CI_PROJECT_DIR} ${pslot} 'global-workflow' --get_pslot_list - tags: - - ${MACHINE} - only: - - schedules parallel: matrix: - - MACHINE: ["orion", "hercules"] + - MACHINE: ["orion"] + tags: + - ${MACHINE} + #only: + # - schedules dependencies: - setup \ No newline at end of file From cb11c593006a617d5d75f64338c2d79f27732e62 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 19 Mar 2025 14:22:06 -0400 Subject: [PATCH 09/45] making progress with GaeaC6 connection --- ci/.gitlab-ci.yml | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index e198444c087..96b560ddcc2 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -4,7 +4,6 @@ stages: - run_tests variables: -# SCHEDULE: "0 0 * * *" GIT_CLONE_PATH: '$CI_BUILDS_DIR/$CI_JOB_ID/$CI_PROJECT_NAME' build: @@ -16,15 +15,15 @@ build: script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(git rev-parse HEAD) # Get git hash from the clone - - echo $HASH > .githash # Save hash to file - - ci/scripts/utils/ci_utils_wrapper.sh build_compute + - echo $HASH > .githash # Save hash to file + #- ci/scripts/utils/ci_utils_wrapper.sh build_compute + - sorc/build_all.sh all + - sorc/link_workflow.sh parallel: matrix: - - MACHINE: ["orion"] + - MACHINE: ["gaeac6"] tags: - ${MACHINE} - #only: - # - schedules artifacts: paths: - .githash # Pass git hash to next stage @@ -36,17 +35,15 @@ setup: stage: create_experments script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - - export HASH=$(cat .githash) # Retrieve git hash from artifact + - export HASH=$(cat .githash) # Retrieve git hash from artifact - RUNTESTS_DIR="${CI_BUILD_DIR}/${CI_JOB_ID}/RUNTESTS" - mkdir -p ${RUNTESTS_DIR} - workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} parallel: matrix: - - MACHINE: ["orion"] + - MACHINE: ["gaeac6"] tags: - ${MACHINE} - #only: - # - schedules dependencies: - build @@ -60,10 +57,8 @@ run_tests: - ci/scripts/run-check_ci.sh ${CI_PROJECT_DIR} ${pslot} 'global-workflow' --get_pslot_list parallel: matrix: - - MACHINE: ["orion"] + - MACHINE: ["gaeac6"] tags: - ${MACHINE} - #only: - # - schedules dependencies: - setup \ No newline at end of file From 827a48679875794081adf7f38e71c80a114e81f8 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 19 Mar 2025 16:57:21 -0400 Subject: [PATCH 10/45] added all that is needed to download and register gitlab runner and made token a file or input --- ci/gitlab_runner.sh | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/ci/gitlab_runner.sh b/ci/gitlab_runner.sh index 0091398ad34..da69a8c8c71 100644 --- a/ci/gitlab_runner.sh +++ b/ci/gitlab_runner.sh @@ -1,24 +1,25 @@ #!/usr/bin/env bash -TOKEN=glrt-t3_ZTdxY19NwELUjMTg8Kdn - -if [[ ! -f gitlab-runner ]]; then - curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 - sudo chmod +x ./gitlab-runner +# Get token from 2nd arg, env, or file +GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} +if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then + if [[ -f gitlab_token ]]; then + source gitlab_token + fi fi +if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then + echo "ERROR: GITLAB_RUNNER_TOKEN not set" + exit 1 +fi -# Check if the GitLab Runner user already exists -if id "gitlab-runner" &>/dev/null; then - echo "User gitlab-runner already exists." -else - sudo useradd --comment 'GitLab Runner' --create-home gitlab-runner --shell /bin/bash -fi +URL=https://vlab.noaa.gov/gitlab-licensed +BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI -if [[ $1 == "install_service" ]]; then - sudo gitlab-runner install --user=gitlab-runner --working-directory=/home/gitlab-runner - gitlab-runner start +if [[ ! -f gitlab-runner ]]; then + curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 + chmod +x ./gitlab-runner fi if [[ $1 == "register" ]]; then - gitlab-runner register -n -t ${TOKEN} --url http://localhost:8929 --executor shell + ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${URL} --executor shell --builds-dir ${BUILDS_DIR} --shell bash --custom_build_dir-enabled true fi From d1df1874f522108681edf853ac556e53c7290eec Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 19 Mar 2025 17:12:09 -0400 Subject: [PATCH 11/45] added HOMEgfs to pipline --- ci/.gitlab-ci.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 96b560ddcc2..6d85f961397 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -5,6 +5,7 @@ stages: variables: GIT_CLONE_PATH: '$CI_BUILDS_DIR/$CI_JOB_ID/$CI_PROJECT_NAME' + HOMEgfs: $GIT_CLONE_PATH build: variables: @@ -16,8 +17,7 @@ build: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(git rev-parse HEAD) # Get git hash from the clone - echo $HASH > .githash # Save hash to file - #- ci/scripts/utils/ci_utils_wrapper.sh build_compute - - sorc/build_all.sh all + - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh parallel: matrix: @@ -36,9 +36,10 @@ setup: script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(cat .githash) # Retrieve git hash from artifact - - RUNTESTS_DIR="${CI_BUILD_DIR}/${CI_JOB_ID}/RUNTESTS" + - RUNTESTS_DIR="${CI_BUILDS_DIR}/${CI_JOB_ID}/RUNTESTS" - mkdir -p ${RUNTESTS_DIR} - - workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} + - ${HOMEgfs}/workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} + parallel: matrix: - MACHINE: ["gaeac6"] @@ -54,7 +55,7 @@ run_tests: script: - export CI_PROJECT_DIR=$GIT_CLONE_PATH - source ${HOMEgfs}/workflow/gw_setup.sh - - ci/scripts/run-check_ci.sh ${CI_PROJECT_DIR} ${pslot} 'global-workflow' --get_pslot_list + - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOMEgfs} ${pslot} ${CI_PROJECT_NAME} --get_pslot_list parallel: matrix: - MACHINE: ["gaeac6"] From df3c04ece1092bc9427250964d4ed52cffd5acac Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 20 Mar 2025 14:50:02 -0400 Subject: [PATCH 12/45] using the same build dir between jobs --- ci/.gitlab-ci.yml | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 6d85f961397..5d73182c1b6 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -4,8 +4,20 @@ stages: - run_tests variables: - GIT_CLONE_PATH: '$CI_BUILDS_DIR/$CI_JOB_ID/$CI_PROJECT_NAME' + # Use a date-based unique identifier in the build path for nightly builds + # Format: YYYY-MM-DD (year-month-day) + BUILD_DATE: '$(date +%Y-%m-%d)' + # Fixed workspace path with date-based identifier to ensure each nightly build gets its own workspace + GIT_CLONE_PATH: '$CI_BUILDS_DIR/global-workflow-${BUILD_DATE}' HOMEgfs: $GIT_CLONE_PATH + # Add GIT_DEPTH to speed up cloning + GIT_DEPTH: 1 + +# Define a cache to preserve the workspace between jobs +cache: + key: ${CI_COMMIT_REF_SLUG}-${BUILD_DATE} + paths: + - $GIT_CLONE_PATH build: variables: @@ -14,6 +26,7 @@ build: stage: build script: + - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(git rev-parse HEAD) # Get git hash from the clone - echo $HASH > .githash # Save hash to file @@ -31,15 +44,16 @@ build: setup: variables: - GIT_STRATEGY: fetch + # Don't re-clone, just use the existing workspace + GIT_STRATEGY: none stage: create_experments script: + - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - export CI_PROJECT_DIR=$GIT_CLONE_PATH - export HASH=$(cat .githash) # Retrieve git hash from artifact - - RUNTESTS_DIR="${CI_BUILDS_DIR}/${CI_JOB_ID}/RUNTESTS" + - RUNTESTS_DIR="${CI_BUILDS_DIR}/RUNTESTS-${BUILD_DATE}" - mkdir -p ${RUNTESTS_DIR} - ${HOMEgfs}/workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} - parallel: matrix: - MACHINE: ["gaeac6"] @@ -50,9 +64,11 @@ setup: run_tests: variables: - GIT_STRATEGY: fetch + # Don't re-clone, just use the existing workspace + GIT_STRATEGY: none stage: run_tests script: + - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - export CI_PROJECT_DIR=$GIT_CLONE_PATH - source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOMEgfs} ${pslot} ${CI_PROJECT_NAME} --get_pslot_list From a1de1d29fd8d7c0df7a7542c6ea7c4d45e34dbc1 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 20 Mar 2025 15:01:11 -0400 Subject: [PATCH 13/45] fix workspace between jobs --- ci/error.sh | 13 +++++++++++++ sorc/verif-global.fd | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 ci/error.sh diff --git a/ci/error.sh b/ci/error.sh new file mode 100644 index 00000000000..63743b3da4f --- /dev/null +++ b/ci/error.sh @@ -0,0 +1,13 @@ +$ echo ${RUNTESTS_DIR} +/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/RUNTESTS +$ echo ${HASH} +78c89a111902f81bdd3c8d51c1ca05ebdf7c5af5 +$ mkdir -p ${RUNTESTS_DIR} +$ ${HOMEgfs}/workflow/generate_workflows.sh -G -t ${HASH} ${RUNTESTS_DIR} +The RUNTESTS directory /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/RUNTESTS already exists. +Would you like to remove it? +Running all GFS cases in /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/ci/cases/pr +Begin link_workflow.sh at Thu 20 Mar 2025 06:18:41 AM UTC +/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/sorc/link_workflow.sh: line 59: /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/sorc/gfs_utils.fd/ush/detect_machine.sh: No such file or directory +End link_workflow.sh at 06:18:41 with error code 1 (time elapsed: 00:00:00) +link_workflow.sh failed! \ No newline at end of file diff --git a/sorc/verif-global.fd b/sorc/verif-global.fd index ecb31f4575d..b2ee80cac79 160000 --- a/sorc/verif-global.fd +++ b/sorc/verif-global.fd @@ -1 +1 @@ -Subproject commit ecb31f4575d86e9ce50495682adb550992ccfc6a +Subproject commit b2ee80cac7921a3016fa5a857cc58acfccc4baea From 0d244a88fca64dd46bd7f84a0b5fa032fcacc1d6 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 20 Mar 2025 16:45:22 -0400 Subject: [PATCH 14/45] moved gitlab_runner start script into ci/scripts/utils and beefed it up to run from there per machine --- ci/gitlab_runner.sh | 25 --------------- ci/platforms/config.gaeac6 | 4 +++ ci/scripts/utils/gitlab_runner.sh | 53 +++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 25 deletions(-) delete mode 100644 ci/gitlab_runner.sh create mode 100644 ci/scripts/utils/gitlab_runner.sh diff --git a/ci/gitlab_runner.sh b/ci/gitlab_runner.sh deleted file mode 100644 index da69a8c8c71..00000000000 --- a/ci/gitlab_runner.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash - -# Get token from 2nd arg, env, or file -GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} -if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then - if [[ -f gitlab_token ]]; then - source gitlab_token - fi -fi -if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then - echo "ERROR: GITLAB_RUNNER_TOKEN not set" - exit 1 -fi - -URL=https://vlab.noaa.gov/gitlab-licensed -BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI - -if [[ ! -f gitlab-runner ]]; then - curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 - chmod +x ./gitlab-runner -fi - -if [[ $1 == "register" ]]; then - ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${URL} --executor shell --builds-dir ${BUILDS_DIR} --shell bash --custom_build_dir-enabled true -fi diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 3fbdfc90240..93bb698bd18 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -19,6 +19,10 @@ export STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR # CI BASH test directories export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI +# CI GitLab test directories +GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed +GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI + export HPC_ACCOUNT=drsa-precip3 export max_concurrent_cases=5 export max_concurrent_pr=4 diff --git a/ci/scripts/utils/gitlab_runner.sh b/ci/scripts/utils/gitlab_runner.sh new file mode 100644 index 00000000000..38571bc5782 --- /dev/null +++ b/ci/scripts/utils/gitlab_runner.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" +host=$(hostname) + +######################################################################### +# Set up runtime environment varibles for accounts on supproted machines +######################################################################### + +source "${HOMEGFS_}/ush/detect_machine.sh" +case ${MACHINE_ID} in + hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) + echo "Launch Jenkins Java Controler on ${MACHINE_ID}";; + noaacloud ) + echo "Launch Jenkins Java Controler on ${PW_CSP}";; + *) + echo "Unsupported platform. Exiting with error." + exit 1;; +esac +source ${HOMEGFS_}/ci/platforms/config.${MACHINE_ID} + +cd ${GITLAB_RUNNER_DIR} + +GITLAB_LOG=launched_gitlab_runner-$(date +%Y%m%d%M).log +rm -f "${LOG}" +echo "Registering Gitlab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" + +# Get token from 2nd arg, env, or file +GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} +if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then + if [[ -f gitlab_token ]]; then + source gitlab_token + fi +fi +if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then + echo "ERROR: GITLAB_RUNNER_TOKEN not set" + exit 1 +fi + +if [[ ! -f gitlab-runner ]]; then + curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 + chmod +x ./gitlab-runner +fi + +if [[ $1 == "register" ]]; then + ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true +fi +if [[ $1 == "run" ]]; then + ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} --group ${USER} --log-level debug +fi +if [[ $1 == "unregister" ]]; then + ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} +fi From 44b653509939d853b21dfcb1057ca8abb93688f4 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 06:38:28 -0400 Subject: [PATCH 15/45] renamed gitlab runner launch script --- .../utils/{gitlab_runner.sh => lanuch_gitlab_runner.sh} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename ci/scripts/utils/{gitlab_runner.sh => lanuch_gitlab_runner.sh} (93%) diff --git a/ci/scripts/utils/gitlab_runner.sh b/ci/scripts/utils/lanuch_gitlab_runner.sh similarity index 93% rename from ci/scripts/utils/gitlab_runner.sh rename to ci/scripts/utils/lanuch_gitlab_runner.sh index 38571bc5782..a43e8dd32f5 100644 --- a/ci/scripts/utils/gitlab_runner.sh +++ b/ci/scripts/utils/lanuch_gitlab_runner.sh @@ -10,9 +10,9 @@ host=$(hostname) source "${HOMEGFS_}/ush/detect_machine.sh" case ${MACHINE_ID} in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) - echo "Launch Jenkins Java Controler on ${MACHINE_ID}";; + echo "Launch GitLab Runner on ${MACHINE_ID}";; noaacloud ) - echo "Launch Jenkins Java Controler on ${PW_CSP}";; + echo "Launch GitLab Runner on ${PW_CSP}";; *) echo "Unsupported platform. Exiting with error." exit 1;; From 9bc9f78b1b752faf79e577cc1223237847618799 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 06:40:41 -0400 Subject: [PATCH 16/45] first working create job - stubbed run --- ci/.gitlab-ci.yml | 50 +++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 5d73182c1b6..d738d557817 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -4,21 +4,12 @@ stages: - run_tests variables: - # Use a date-based unique identifier in the build path for nightly builds - # Format: YYYY-MM-DD (year-month-day) - BUILD_DATE: '$(date +%Y-%m-%d)' - # Fixed workspace path with date-based identifier to ensure each nightly build gets its own workspace + BUILD_DATE: 'friday' GIT_CLONE_PATH: '$CI_BUILDS_DIR/global-workflow-${BUILD_DATE}' - HOMEgfs: $GIT_CLONE_PATH - # Add GIT_DEPTH to speed up cloning + HOMEGFS: $GIT_CLONE_PATH + RUNTESTS_DIR: $CI_BUILDS_DIR/RUNTESTS GIT_DEPTH: 1 -# Define a cache to preserve the workspace between jobs -cache: - key: ${CI_COMMIT_REF_SLUG}-${BUILD_DATE} - paths: - - $GIT_CLONE_PATH - build: variables: GIT_STRATEGY: clone @@ -26,10 +17,8 @@ build: stage: build script: - - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - - export CI_PROJECT_DIR=$GIT_CLONE_PATH - - export HASH=$(git rev-parse HEAD) # Get git hash from the clone - - echo $HASH > .githash # Save hash to file + - 'echo "Using build directory: ${HOMEGFS} (dated: ${BUILD_DATE})"' + - export CI_PROJECT_DIR=$HOMEGFS - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh parallel: @@ -37,23 +26,19 @@ build: - MACHINE: ["gaeac6"] tags: - ${MACHINE} - artifacts: - paths: - - .githash # Pass git hash to next stage - expire_in: 1 hour setup: variables: - # Don't re-clone, just use the existing workspace GIT_STRATEGY: none stage: create_experments script: - - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - - export CI_PROJECT_DIR=$GIT_CLONE_PATH - - export HASH=$(cat .githash) # Retrieve git hash from artifact - - RUNTESTS_DIR="${CI_BUILDS_DIR}/RUNTESTS-${BUILD_DATE}" + - 'echo "Using build directory: $HOMEGFS (dated: ${BUILD_DATE})"' + - export CI_PROJECT_DIR=$HOMEGFS + - cd ${HOMEGFS}/workflow + - HASH=$(git rev-parse HEAD) - mkdir -p ${RUNTESTS_DIR} - - ${HOMEgfs}/workflow/generate_workflow.sh -G -t ${HASH} -D ${RUNTESTS_DIR} + - echo ${PWD} + - ./generate_workflows.sh -G -t ${HASH} ${RUNTESTS_DIR} parallel: matrix: - MACHINE: ["gaeac6"] @@ -64,18 +49,19 @@ setup: run_tests: variables: - # Don't re-clone, just use the existing workspace GIT_STRATEGY: none stage: run_tests script: - - echo "Using build directory: $GIT_CLONE_PATH (dated: ${BUILD_DATE})" - - export CI_PROJECT_DIR=$GIT_CLONE_PATH - - source ${HOMEgfs}/workflow/gw_setup.sh - - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOMEgfs} ${pslot} ${CI_PROJECT_NAME} --get_pslot_list + - 'echo "Using build directory: $HOMEGFS (dated: ${BUILD_DATE})"' + - echo ${PWD} + - export CI_PROJECT_DIR=$HOMEGFS + - echo ${PWD} + - echo "stubbing run-check" + #- ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} --get_pslot_list parallel: matrix: - MACHINE: ["gaeac6"] tags: - ${MACHINE} dependencies: - - setup \ No newline at end of file + - setup From 67deb8d73b97ddad0351fcfb6d75fbc1aa3b61dc Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 08:28:48 -0400 Subject: [PATCH 17/45] using fixed list of cases in matrix and using create_experment with ci utils wrapper --- ci/.gitlab-ci.yml | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index d738d557817..fd2e06ca8e1 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -1,6 +1,6 @@ stages: - build - - create_experments + - create_experiments - run_tests variables: @@ -17,10 +17,11 @@ build: stage: build script: - - 'echo "Using build directory: ${HOMEGFS} (dated: ${BUILD_DATE})"' - export CI_PROJECT_DIR=$HOMEGFS + - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh + - mkdir -p ${RUNTESTS_DIR} parallel: matrix: - MACHINE: ["gaeac6"] @@ -30,18 +31,13 @@ build: setup: variables: GIT_STRATEGY: none - stage: create_experments + stage: create_experiments script: - - 'echo "Using build directory: $HOMEGFS (dated: ${BUILD_DATE})"' - - export CI_PROJECT_DIR=$HOMEGFS - - cd ${HOMEGFS}/workflow - - HASH=$(git rev-parse HEAD) - - mkdir -p ${RUNTESTS_DIR} - - echo ${PWD} - - ./generate_workflows.sh -G -t ${HASH} ${RUNTESTS_DIR} + - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName} parallel: matrix: - MACHINE: ["gaeac6"] + - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] tags: - ${MACHINE} dependencies: @@ -52,10 +48,7 @@ run_tests: GIT_STRATEGY: none stage: run_tests script: - - 'echo "Using build directory: $HOMEGFS (dated: ${BUILD_DATE})"' - - echo ${PWD} - - export CI_PROJECT_DIR=$HOMEGFS - - echo ${PWD} + - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - echo "stubbing run-check" #- ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} --get_pslot_list parallel: @@ -64,4 +57,4 @@ run_tests: tags: - ${MACHINE} dependencies: - - setup + - setup_generate_workflows From a7482fa0d456a33230566fe15d091a1311805119 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 12:08:29 -0400 Subject: [PATCH 18/45] added mech to skip cases in gitlab pipeline per host --- ci/.gitlab-ci.yml | 23 ++++++++++++------- ...tlab_runner.sh => launch_gitlab_runner.sh} | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) rename ci/scripts/utils/{lanuch_gitlab_runner.sh => launch_gitlab_runner.sh} (92%) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index fd2e06ca8e1..a34358e81ce 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -22,24 +22,31 @@ build: - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh - mkdir -p ${RUNTESTS_DIR} + - ${HOMEGFS}/ci/scripts/utils/git_host_case_list.py $MACHINE > cases_on_host.txt parallel: matrix: - MACHINE: ["gaeac6"] + artifacts: + paths: + - cases_on_host.txt tags: - ${MACHINE} -setup: + +setup_experiments: variables: GIT_STRATEGY: none stage: create_experiments script: - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName} + rules: + script: + - grep -q ${caseName} cases_on_host.txt parallel: matrix: - - MACHINE: ["gaeac6"] - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] tags: - - ${MACHINE} + - gaeac6 dependencies: - build @@ -49,12 +56,12 @@ run_tests: stage: run_tests script: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - - echo "stubbing run-check" - #- ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} --get_pslot_list + - pslot=${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName} + - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} parallel: matrix: - - MACHINE: ["gaeac6"] + - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] tags: - - ${MACHINE} + - gaeac6 dependencies: - - setup_generate_workflows + - setup_experiments diff --git a/ci/scripts/utils/lanuch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh similarity index 92% rename from ci/scripts/utils/lanuch_gitlab_runner.sh rename to ci/scripts/utils/launch_gitlab_runner.sh index a43e8dd32f5..eb263e7cffc 100644 --- a/ci/scripts/utils/lanuch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -46,7 +46,7 @@ if [[ $1 == "register" ]]; then ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true fi if [[ $1 == "run" ]]; then - ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} --group ${USER} --log-level debug + nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} --group ${USER} --log-level debug >> ${GITLAB_LOG} 2>&1 & fi if [[ $1 == "unregister" ]]; then ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} From 7c1c2c4dfdc079919e1a30de043a2962e70a26c1 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 12:18:35 -0400 Subject: [PATCH 19/45] couple of small changes --- ci/.gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index a34358e81ce..50c601aedf0 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -19,7 +19,7 @@ build: script: - export CI_PROJECT_DIR=$HOMEGFS - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - - ci/scripts/utils/ci_utils_wrapper.sh build_compute + #- ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh - mkdir -p ${RUNTESTS_DIR} - ${HOMEGFS}/ci/scripts/utils/git_host_case_list.py $MACHINE > cases_on_host.txt @@ -28,7 +28,7 @@ build: - MACHINE: ["gaeac6"] artifacts: paths: - - cases_on_host.txt + - cases_on_host.txt tags: - ${MACHINE} @@ -38,7 +38,7 @@ setup_experiments: GIT_STRATEGY: none stage: create_experiments script: - - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName} + - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName}.py rules: script: - grep -q ${caseName} cases_on_host.txt @@ -56,7 +56,7 @@ run_tests: stage: run_tests script: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - - pslot=${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName} + - pslot=$(${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} parallel: matrix: From ed34030693094a30f8aaf2f0b2408f99078e0d07 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Tue, 18 Mar 2025 12:22:16 -0400 Subject: [PATCH 20/45] added skip rule in run tests --- ci/.gitlab-ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 50c601aedf0..1c7ab96c198 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -58,6 +58,9 @@ run_tests: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - pslot=$(${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} + rules: + script: + - grep -q ${caseName} cases_on_host.txt parallel: matrix: - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] From 717cf22bdffaa1a744915b69e8a651f929049996 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Fri, 21 Mar 2025 21:51:40 -0400 Subject: [PATCH 21/45] hard coded skip hosts for Gaea C6 and increased timeouts --- ci/.gitlab-ci.yml | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 1c7ab96c198..ef2b1eec820 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -9,6 +9,8 @@ variables: HOMEGFS: $GIT_CLONE_PATH RUNTESTS_DIR: $CI_BUILDS_DIR/RUNTESTS GIT_DEPTH: 1 + RUNNER_SCRIPT_TIMEOUT: 6h + RUNNER_AFTER_SCRIPT_TIMEOUT: 6h build: variables: @@ -19,16 +21,19 @@ build: script: - export CI_PROJECT_DIR=$HOMEGFS - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - #- ci/scripts/utils/ci_utils_wrapper.sh build_compute + - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh - mkdir -p ${RUNTESTS_DIR} - - ${HOMEGFS}/ci/scripts/utils/git_host_case_list.py $MACHINE > cases_on_host.txt + # NEED skip_ci_on_hosts to work (skipping for now) + # - source ${HOMEGFS}/workflow/gw_setup.sh + # - cases_on_host=$(${HOMEGFS}/ci/scripts/utils/get_host_case_list.py $MACHINE) + # - echo "cases_on_host=$cases_on_host" >> variables.env parallel: matrix: - MACHINE: ["gaeac6"] - artifacts: - paths: - - cases_on_host.txt + #artifacts: + # reports: + # dotenv: variables.env tags: - ${MACHINE} @@ -38,13 +43,12 @@ setup_experiments: GIT_STRATEGY: none stage: create_experiments script: - - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName}.py - rules: - script: - - grep -q ${caseName} cases_on_host.txt + - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName}.yaml + #rules: + # - if: '$cases_on_host =~ /$caseName/' parallel: matrix: - - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] + - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] tags: - gaeac6 dependencies: @@ -57,13 +61,12 @@ run_tests: script: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - pslot=$(${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${HOMEGFS} ${pslot} global-workflow-${BUILD_DATE} - rules: - script: - - grep -q ${caseName} cases_on_host.txt + - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${CI_BUILDS_DIR} ${pslot} global-workflow-${BUILD_DATE} + #rules: + # - if: '$cases_on_host =~ /$caseName/' parallel: matrix: - - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW_extended", "C48_S2SW", "C96_atm3DVar_extended", "C96_atm3DVar", "C96C48_hybatmaerosnowDA", "C96C48_hybatmDA", "C96C48_ufs_hybatmDA", "C96mx100_S2S"] + - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] tags: - gaeac6 dependencies: From 490de9b48588aa595a3b9b813fd5d65e6fbb0d48 Mon Sep 17 00:00:00 2001 From: "Terry.McGuinnes" Date: Fri, 21 Mar 2025 22:15:44 -0400 Subject: [PATCH 22/45] added concurrancy to gitlab launcher --- ci/scripts/utils/launch_gitlab_runner.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) mode change 100644 => 100755 ci/scripts/utils/launch_gitlab_runner.sh diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh old mode 100644 new mode 100755 index eb263e7cffc..21114a1ca99 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -10,9 +10,9 @@ host=$(hostname) source "${HOMEGFS_}/ush/detect_machine.sh" case ${MACHINE_ID} in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) - echo "Launch GitLab Runner on ${MACHINE_ID}";; + echo "Launching GitLab Runner on ${MACHINE_ID}";; noaacloud ) - echo "Launch GitLab Runner on ${PW_CSP}";; + echo "Launching GitLab Runner on ${PW_CSP}";; *) echo "Unsupported platform. Exiting with error." exit 1;; @@ -43,10 +43,12 @@ if [[ ! -f gitlab-runner ]]; then fi if [[ $1 == "register" ]]; then - ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true + ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true --request-concurrency 24 + sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml fi if [[ $1 == "run" ]]; then - nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} --group ${USER} --log-level debug >> ${GITLAB_LOG} 2>&1 & + echo "Running gitlab-runner with nohup see log ${PWD}/${GITLAB_LOG}" + nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} >> ${GITLAB_LOG} 2>&1 & fi if [[ $1 == "unregister" ]]; then ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} From 0dce55acbbf9e7b25604d6dc48260b00fb3fcd3c Mon Sep 17 00:00:00 2001 From: "Terry.McGuinnes" Date: Fri, 21 Mar 2025 22:49:43 -0400 Subject: [PATCH 23/45] debugged inplace gitlat runner launch script --- ci/platforms/config.gaeac6 | 1 + ci/scripts/utils/launch_gitlab_runner.sh | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 93bb698bd18..ac4b8f5c897 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -22,6 +22,7 @@ export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI # CI GitLab test directories GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI +GITLAB_RUNNER_DIR=${GFS_CI_ROOT}/GitLab/Runner export HPC_ACCOUNT=drsa-precip3 export max_concurrent_cases=5 diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 21114a1ca99..01fe45dcd6a 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -22,8 +22,10 @@ source ${HOMEGFS_}/ci/platforms/config.${MACHINE_ID} cd ${GITLAB_RUNNER_DIR} GITLAB_LOG=launched_gitlab_runner-$(date +%Y%m%d%M).log +GITLAB_RUNNER_NAME="RDHPCS Gaea C6" rm -f "${LOG}" echo "Registering Gitlab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" +echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" # Get token from 2nd arg, env, or file GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} @@ -47,8 +49,11 @@ if [[ $1 == "register" ]]; then sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml fi if [[ $1 == "run" ]]; then - echo "Running gitlab-runner with nohup see log ${PWD}/${GITLAB_LOG}" - nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR} --user ${USER} >> ${GITLAB_LOG} 2>&1 & + COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR}" + # --user ${USER}" + echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${PWD}/${GITLAB_LOG}" + nohup $COMMAND >> ${GITLAB_LOG} 2>&1 & + cat ${GITLAB_LOG} fi if [[ $1 == "unregister" ]]; then ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} From 64574f5105e676e9d21dcffab66106d1e0f2cdad Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Fri, 21 Mar 2025 23:57:51 -0400 Subject: [PATCH 24/45] deleted work temp files --- ci/config.toml | 24 ------------------------ ci/docker-compose.yml | 21 --------------------- ci/error.sh | 13 ------------- 3 files changed, 58 deletions(-) delete mode 100644 ci/config.toml delete mode 100644 ci/docker-compose.yml delete mode 100644 ci/error.sh diff --git a/ci/config.toml b/ci/config.toml deleted file mode 100644 index 2e8246357ad..00000000000 --- a/ci/config.toml +++ /dev/null @@ -1,24 +0,0 @@ -concurrent = 1 -check_interval = 0 -shutdown_timeout = 0 - -builds_dir = "/home/tmcguinness/GITLAB/RUNNER/CI" - -[session_server] - session_timeout = 1800 - -[[runners]] - name = "Orion-login-1.HPC.MsState.Edu" - url = "http://localhost:8929" - id = 1 - token = "token_redacted" - token_obtained_at = 2025-03-18T21:32:07Z - token_expires_at = 0001-01-01T00:00:00Z - executor = "shell" - [runners.cache] - MaxUploadedArchiveSize = 0 - [runners.cache.s3] - [runners.cache.gcs] - [runners.cache.azure] - [runners.custom_build_dir] - enabled = true diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml deleted file mode 100644 index f3f612c63c0..00000000000 --- a/ci/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -services: - gitlab: - image: gitlab/gitlab-ce:17.6.4-ce.0 - container_name: gitlab - restart: always - hostname: 'localhost' - environment: - GITLAB_OMNIBUS_CONFIG: | - # Add any other gitlab.rb configuration here, each on its own line - external_url 'http://localhost:8929' - ports: - - '8929:8929' - - '443:443' - - '2424:22' - volumes: - - '/home/tmcguinness/GITLAB/config:/etc/gitlab' - - '/home/tmcguinness/GITLAB/logs:/var/log/gitlab' - - '/home/tmcguinness/GITLAB/data:/var/opt/gitlab' - shm_size: '256m' - user: "${UID}:${GID}" - diff --git a/ci/error.sh b/ci/error.sh deleted file mode 100644 index 63743b3da4f..00000000000 --- a/ci/error.sh +++ /dev/null @@ -1,13 +0,0 @@ -$ echo ${RUNTESTS_DIR} -/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/RUNTESTS -$ echo ${HASH} -78c89a111902f81bdd3c8d51c1ca05ebdf7c5af5 -$ mkdir -p ${RUNTESTS_DIR} -$ ${HOMEgfs}/workflow/generate_workflows.sh -G -t ${HASH} ${RUNTESTS_DIR} -The RUNTESTS directory /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/RUNTESTS already exists. -Would you like to remove it? -Running all GFS cases in /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/ci/cases/pr -Begin link_workflow.sh at Thu 20 Mar 2025 06:18:41 AM UTC -/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/sorc/link_workflow.sh: line 59: /gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI/9924/global-workflow/sorc/gfs_utils.fd/ush/detect_machine.sh: No such file or directory -End link_workflow.sh at 06:18:41 with error code 1 (time elapsed: 00:00:00) -link_workflow.sh failed! \ No newline at end of file From 2e8e502922012d543bc8c3dee7e4e78450d382ce Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Sat, 22 Mar 2025 00:00:56 -0400 Subject: [PATCH 25/45] reverting to org run-check ci script --- ci/scripts/run-check_ci.sh | 145 ++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 81 deletions(-) diff --git a/ci/scripts/run-check_ci.sh b/ci/scripts/run-check_ci.sh index 87e4d9d475b..5c49a21c4ba 100755 --- a/ci/scripts/run-check_ci.sh +++ b/ci/scripts/run-check_ci.sh @@ -9,14 +9,7 @@ set -eu TEST_DIR=${1:-${TEST_DIR:-?}} # Location of the root of the testing directory pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this script -SYSTEM_BUILD_DIR=${3:-"global-workflow"} # Name of the system build directory, default is "global-workflow" -GET_PSLOTS=${4:-"false"} # Flag to get the list of pslots from the the directory pslot - -# Check for usage -if [[ $# -lt 2 || $# -gt 4 ]]; then - echo "Usage: $0 [SYSTEM_BUILD_DIR] [--get_pslot_list]" - exit 1 -fi +SYSTEM_BUILD_DIR=${3:-"global-workflow"} # Name of the system build directory, default is "global-workflow # TEST_DIR contains 2 directories; # 1. HOMEgfs: clone of the global-workflow @@ -26,7 +19,7 @@ fi # ├── HOMEgfs # └── RUNTESTS # ├── COMROOT -# │ └── ${pslot} +# │   └── ${pslot} # └── EXPDIR # └── ${pslot} # Two system build directories created at build time gfs, and gdas @@ -39,91 +32,80 @@ run_check_logfile="${RUNTESTS}/ci-run_check.log" echo "Source modules." source "${HOMEgfs}/workflow/gw_setup.sh" -if [[ "${GET_PSLOTS}" == "--get_pslot_list" ]]; then - pslot_list=$("${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh" get_pslot_list "${RUNTESTS}") -else - pslot_list=(${pslot}) -fi -echo "Experment being ran with rocotorun and rocotostat.py: ${pslot_list}" +# cd into the experiment directory +echo "cd ${RUNTESTS}/EXPDIR/${pslot}" +cd "${RUNTESTS}/EXPDIR/${pslot}" || (echo "FATAL ERROR: Unable to cd into '${RUNTESTS}/EXPDIR/${pslot}', ABORT!"; exit 1) -for pslot in ${pslot_list[@]}; do +# Name of the Rocoto XML and database files +xml="${pslot}.xml" +db="${pslot}.db" - # cd into the experiment directory - echo "cd ${RUNTESTS}/EXPDIR/${pslot}" - cd "${RUNTESTS}/EXPDIR/${pslot}" || (echo "FATAL ERROR: Unable to cd into '${RUNTESTS}/EXPDIR/${pslot}', ABORT!"; exit 1) +# Ensure the XML is present for the experiment +if [[ ! -f "${xml}" ]]; then + echo "FATAL ERROR: XML file ${xml} not found in '${pslot}', experiment ${pslot} failed, ABORT!" + exit 1 +fi - # Name of the Rocoto XML and database files - xml="${pslot}.xml" - db="${pslot}.db" +# Launch experiment +echo "Launch experiment with Rocoto." +rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" +sleep 10 +if [[ ! -f "${db}" ]]; then + echo "FATAL ERROR: Rocoto database file ${db} not found, experiment ${pslot} failed, ABORT!" + exit 2 +fi - # Ensure the XML is present for the experiment - if [[ ! -f "${xml}" ]]; then - echo "FATAL ERROR: XML file ${xml} not found in '${pslot}', experiment ${pslot} failed, ABORT!" - exit 1 - fi +# Experiment launched +rc=99 +set +e +while true; do - # Launch experiment - echo "Launch experiment with Rocoto." + echo "Run rocotorun." rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" - sleep 10 - if [[ ! -f "${db}" ]]; then - echo "FATAL ERROR: Rocoto database file ${db} not found, experiment ${pslot} failed, ABORT!" - exit 2 - fi - # Experiment launched - rc=99 - set +e - while true; do + # Wait before running rocotostat + sleep 60 - echo "Run rocotorun." - rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}" + # Get job statistics + echo "Gather Rocoto statistics" + # shellcheck disable=SC2312 # We want to use the exit code of the command + full_state=$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}" -v) + error_stat=$? - # Wait before running rocotostat - sleep 60 - - # Get job statistics - echo "Gather Rocoto statistics" - # shellcheck disable=SC2312 # We want to use the exit code of the command - full_state=$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}" -v) - error_stat=$? - - for state in CYCLES_TOTAL CYCLES_DONE SUCCEEDED FAIL DEAD; do - declare "${state}"="$(echo "${full_state}" | grep "${state}" | cut -d: -f2)" || true - done - ROCOTO_STATE=$(echo "${full_state}" | tail -1) || exit 1 + for state in CYCLES_TOTAL CYCLES_DONE SUCCEEDED FAIL DEAD; do + declare "${state}"="$(echo "${full_state}" | grep "${state}" | cut -d: -f2)" || true + done + ROCOTO_STATE=$(echo "${full_state}" | tail -1) || exit 1 - echo -e "(${pslot} on ${MACHINE_ID^})\n\tTotal Cycles: ${CYCLES_TOTAL}\n\tNumber Cycles done: ${CYCLES_DONE}\n\tState: ${ROCOTO_STATE}" + echo -e "(${pslot} on ${MACHINE_ID^})\n\tTotal Cycles: ${CYCLES_TOTAL}\n\tNumber Cycles done: ${CYCLES_DONE}\n\tState: ${ROCOTO_STATE}" - if [[ ${error_stat} -ne 0 ]]; then - { - echo "Experiment ${pslot} Terminated with ${FAIL} tasks failed and ${DEAD} dead at $(date)" || true - echo "Experiment ${pslot} Terminated: *${ROCOTO_STATE}*" - } | tee -a "${run_check_logfile}" - if [[ "${DEAD}" -ne 0 ]]; then - error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true - { - echo "Error logs:" - echo "${error_logs}" - } | tee -a "${run_check_logfile}" - rm -f "${RUNTESTS}/${pslot}_error.logs" - for log in ${error_logs}; do - echo "RUNTESTS${log#*RUNTESTS}" >> "${RUNTESTS}/${pslot}_error.logs" - done - fi - rc=1 - break - fi - - if [[ "${ROCOTO_STATE}" == "DONE" ]]; then + if [[ ${error_stat} -ne 0 ]]; then + { + echo "Experiment ${pslot} Terminated with ${FAIL} tasks failed and ${DEAD} dead at $(date)" || true + echo "Experiment ${pslot} Terminated: *${ROCOTO_STATE}*" + } | tee -a "${run_check_logfile}" + if [[ "${DEAD}" -ne 0 ]]; then + error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true { - echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles: *SUCCESS* at $(date)" || true - } | tee -a "${run_check_logfile}" - rc=0 - break - fi + echo "Error logs:" + echo "${error_logs}" + } | tee -a "${run_check_logfile}" + rm -f "${RUNTESTS}/${pslot}_error.logs" + for log in ${error_logs}; do + echo "RUNTESTS${log#*RUNTESTS}" >> "${RUNTESTS}/${pslot}_error.logs" + done + fi + rc=1 + break + fi - done + if [[ "${ROCOTO_STATE}" == "DONE" ]]; then + { + echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles: *SUCCESS* at $(date)" || true + } | tee -a "${run_check_logfile}" + rc=0 + break + fi # Wait before running rocotorun again sleep 300 @@ -131,3 +113,4 @@ for pslot in ${pslot_list[@]}; do done exit "${rc}" + From 6d2c0751ef8af0d7e65d38112172a80f8ab2c66d Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Sat, 22 Mar 2025 00:52:57 -0400 Subject: [PATCH 26/45] synced submodules --- sorc/verif-global.fd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sorc/verif-global.fd b/sorc/verif-global.fd index b2ee80cac79..ecb31f4575d 160000 --- a/sorc/verif-global.fd +++ b/sorc/verif-global.fd @@ -1 +1 @@ -Subproject commit b2ee80cac7921a3016fa5a857cc58acfccc4baea +Subproject commit ecb31f4575d86e9ce50495682adb550992ccfc6a From bfc9c90668c915a3834ae2e6ada6724c83a98eb7 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 11:19:59 -0400 Subject: [PATCH 27/45] add inline bash documentation to explain the specifics for starting and setting up a GitLab runner to connect to the GitLab server in VLab --- ci/platforms/config.gaeac6 | 62 ++++++++++++++++-- ci/scripts/utils/launch_gitlab_runner.sh | 80 ++++++++++++++++++++++-- 2 files changed, 132 insertions(+), 10 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index ac4b8f5c897..f7601733b0d 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -1,29 +1,79 @@ #!/usr/bin/bash -# Main CI root directory +######################################################################### +# config.gaeac6 - Platform-specific configuration for Gaea C6 +# +# This file contains environment variables used by CI/CD scripts, +# including the launch_gitlab_runner.sh script. It defines paths, +# directories, and settings specific to the Gaea C6 platform. +######################################################################### + +# Main CI root directory - Base directory for all CI/CD operations export GFS_CI_ROOT=/ncrc/proj/nggps_emc/${USER}/GFS_CI_CD -# ICSDIR root directory used on the create_experment.py command line + +# ICSDIR root directory - Contains initial condition data +# Used by create_experiment.py for setting up test cases export ICSDIR_ROOT=/gpfs/f6/bil-fire8/world-shared/global/glopara/data/ICSDIR -# JENKINS launch directory for agent +######################################################################### +# Jenkins-specific configuration settings +######################################################################### + +# JENKINS launch directory for agent - Where Jenkins agents are launched from export JENKINS_AGENT_LAUNCH_DIR=${GFS_CI_ROOT}/Jenkins/agent + # JENKINS internal working directories for CI jobs (not for users use) +# Where Jenkins stores temporary files during CI job execution export JENKINS_WORK_DIR=${GFS_CI_ROOT}/Jenkins/workspace + # NOTE: JENKINS custom_workspace directory where CI jobs are run # /gpfs/f6/drsa-precip3/proj-shared/global/CI # is defined in the Jenkinsfile -# CTest functional test directories for pre stagged input data +######################################################################### +# Test and CI directories +######################################################################### + +# CTest functional test directories for pre-staged input data +# Contains data needed for functional tests export STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR -# CI BASH test directories +# CI BASH test directories - Used for bash script testing export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI -# CI GitLab test directories +######################################################################### +# GitLab CI configuration +# These variables are referenced directly by launch_gitlab_runner.sh +######################################################################### + +# GitLab URL for the CI +# Used in the 'register' step of launch_gitlab_runner.sh for --url parameter GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed + +# Directory for GitLab builds +# Used in launch_gitlab_runner.sh for: +# 1. --builds-dir parameter during 'register' +# 2. --working-directory parameter during 'run' GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI + +# Directory for GitLab runner +# Used by launch_gitlab_runner.sh as the directory to: +# 1. Store the gitlab-runner binary +# 2. Execute the runner from +# 3. Write log files to GITLAB_RUNNER_DIR=${GFS_CI_ROOT}/GitLab/Runner +######################################################################### +# HPC and parallel execution settings +######################################################################### + +# HPC account information - For job submission export HPC_ACCOUNT=drsa-precip3 + +# Maximum number of concurrent cases that can run simultaneously +# Helps prevent overloading the system export max_concurrent_cases=5 + +# Maximum number of concurrent pull requests +# Limits the number of PR builds running at the same time export max_concurrent_pr=4 diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 01fe45dcd6a..6b900524e7c 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -1,13 +1,28 @@ #!/usr/bin/env bash +######################################################################### +# launch_gitlab_runner.sh - Script to manage GitLab runners for CI/CD +# +# This script handles three main operations for GitLab runners: +# 1. register - Registers a new GitLab runner with the GitLab server +# 2. run - Starts a GitLab runner in the background +# 3. unregister - Removes a GitLab runner from the GitLab server +# +# Usage: ./launch_gitlab_runner.sh [register|run|unregister] [token] +######################################################################### + +# Set the HOMEGFS_ variable to the root directory of the global workflow HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" +# Get the hostname of the current machine host=$(hostname) ######################################################################### -# Set up runtime environment varibles for accounts on supproted machines +# Set up runtime environment variables for accounts on supported machines ######################################################################### +# Source the detect_machine.sh script to determine the MACHINE_ID source "${HOMEGFS_}/ush/detect_machine.sh" +# Check the MACHINE_ID and set up the environment accordingly case ${MACHINE_ID} in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) echo "Launching GitLab Runner on ${MACHINE_ID}";; @@ -17,17 +32,35 @@ case ${MACHINE_ID} in echo "Unsupported platform. Exiting with error." exit 1;; esac + +# Source the platform-specific configuration file +# This file contains platform-specific variables such as GITLAB_URL, GITLAB_BUILDS_DIR, +# and GITLAB_RUNNER_DIR which are required for runner registration and execution +# See config.gaeac6 for details on these variables source ${HOMEGFS_}/ci/platforms/config.${MACHINE_ID} +# Change to the GitLab runner directory defined in the platform config cd ${GITLAB_RUNNER_DIR} +# Set the log file name with the current date and time GITLAB_LOG=launched_gitlab_runner-$(date +%Y%m%d%M).log +# Set the GitLab runner name - this name will appear in the GitLab UI GITLAB_RUNNER_NAME="RDHPCS Gaea C6" +# Remove any existing log file rm -f "${LOG}" -echo "Registering Gitlab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" +# Log the registration details +echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" -# Get token from 2nd arg, env, or file +######################################################################### +# GitLab Token Handling +# The token is used to authenticate the runner with the GitLab server +######################################################################### + +# Get the GitLab runner token from: +# 1. The second command-line argument +# 2. The GITLAB_RUNNER_TOKEN environment variable +# 3. A gitlab_token file in the current directory GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then if [[ -f gitlab_token ]]; then @@ -39,22 +72,61 @@ if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then exit 1 fi +# Download the GitLab runner binary if it does not exist if [[ ! -f gitlab-runner ]]; then curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 chmod +x ./gitlab-runner fi +######################################################################### +# REGISTER argument handling +# Registers a new GitLab runner with the GitLab server +######################################################################### + if [[ $1 == "register" ]]; then + # Register the GitLab runner with the following parameters: + # -n: Run in non-interactive mode + # -t: Registration token from GitLab + # --url: URL of the GitLab server (from config.gaeac6) + # --executor: Type of executor (shell in this case) + # --shell: Shell to use for job execution + # --builds-dir: Directory where builds will be stored (from config.gaeac6) + # --custom_build_dir-enabled: Enable custom build directories + # --request-concurrency: Number of concurrent requests that can be handled ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true --request-concurrency 24 + + # Set the concurrent job limit in the GitLab runner config file sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml fi + +######################################################################### +# RUN argument handling +# Starts a GitLab runner in the background +######################################################################### + if [[ $1 == "run" ]]; then + # Construct the command to run the GitLab runner + # nohup: Run the command immune to hangups + # --working-directory: Directory where the runner will store its working files (from config.gaeac6) COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR}" - # --user ${USER}" + # --user ${USER}" # This line is commented out in the original script + + # Print the command and log file location echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${PWD}/${GITLAB_LOG}" + + # Run the command in the background and redirect output to the log file nohup $COMMAND >> ${GITLAB_LOG} 2>&1 & + + # Display the current contents of the log file cat ${GITLAB_LOG} fi + +######################################################################### +# UNREGISTER argument handling +# Removes a GitLab runner from the GitLab server +######################################################################### + if [[ $1 == "unregister" ]]; then + # Unregister the GitLab runner by name ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} fi From c8a5b026ee4e6a5a69a677afbbd6ccd87c667eda Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 11:52:54 -0400 Subject: [PATCH 28/45] changed BUILD tag on name of dir for build to today instead of friday --- ci/.gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index ef2b1eec820..f20392d1473 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -4,7 +4,7 @@ stages: - run_tests variables: - BUILD_DATE: 'friday' + BUILD_DATE: 'today' GIT_CLONE_PATH: '$CI_BUILDS_DIR/global-workflow-${BUILD_DATE}' HOMEGFS: $GIT_CLONE_PATH RUNTESTS_DIR: $CI_BUILDS_DIR/RUNTESTS From 081984d0b2e725ea077408c6de02655b104409b3 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 11:58:47 -0400 Subject: [PATCH 29/45] needed to add export to added values in config file for Gaea C6 --- ci/platforms/config.gaeac6 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index f7601733b0d..f73d2912fc1 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -48,20 +48,20 @@ export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI # GitLab URL for the CI # Used in the 'register' step of launch_gitlab_runner.sh for --url parameter -GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed +export GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed # Directory for GitLab builds # Used in launch_gitlab_runner.sh for: # 1. --builds-dir parameter during 'register' # 2. --working-directory parameter during 'run' -GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI +export GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI # Directory for GitLab runner # Used by launch_gitlab_runner.sh as the directory to: # 1. Store the gitlab-runner binary # 2. Execute the runner from # 3. Write log files to -GITLAB_RUNNER_DIR=${GFS_CI_ROOT}/GitLab/Runner +export GITLAB_RUNNER_DIR=${GFS_CI_ROOT}/GitLab/Runner ######################################################################### # HPC and parallel execution settings From 5d4826b3d7aaaf65c92860bbe566ca4f713c3280 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 12:06:09 -0400 Subject: [PATCH 30/45] added double quote shell norms --- ci/platforms/config.gaeac6 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index f73d2912fc1..6931b177dad 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -9,7 +9,7 @@ ######################################################################### # Main CI root directory - Base directory for all CI/CD operations -export GFS_CI_ROOT=/ncrc/proj/nggps_emc/${USER}/GFS_CI_CD +export GFS_CI_ROOT="/ncrc/proj/nggps_emc/${USER}/GFS_CI_CD" # ICSDIR root directory - Contains initial condition data # Used by create_experiment.py for setting up test cases @@ -20,11 +20,11 @@ export ICSDIR_ROOT=/gpfs/f6/bil-fire8/world-shared/global/glopara/data/ICSDIR ######################################################################### # JENKINS launch directory for agent - Where Jenkins agents are launched from -export JENKINS_AGENT_LAUNCH_DIR=${GFS_CI_ROOT}/Jenkins/agent +export JENKINS_AGENT_LAUNCH_DIR="${GFS_CI_ROOT}/Jenkins/agent" # JENKINS internal working directories for CI jobs (not for users use) # Where Jenkins stores temporary files during CI job execution -export JENKINS_WORK_DIR=${GFS_CI_ROOT}/Jenkins/workspace +export JENKINS_WORK_DIR="${GFS_CI_ROOT}/Jenkins/workspace" # NOTE: JENKINS custom_workspace directory where CI jobs are run # /gpfs/f6/drsa-precip3/proj-shared/global/CI @@ -36,10 +36,10 @@ export JENKINS_WORK_DIR=${GFS_CI_ROOT}/Jenkins/workspace # CTest functional test directories for pre-staged input data # Contains data needed for functional tests -export STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR +export STAGED_TESTS_DIR="${GFS_CI_ROOT}/STAGED_TESTS_DIR" # CI BASH test directories - Used for bash script testing -export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI +export GFS_BASH_CI_ROOT="${GFS_CI_ROOT}/GFS_BASH_CI" ######################################################################### # GitLab CI configuration @@ -61,7 +61,7 @@ export GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/ # 1. Store the gitlab-runner binary # 2. Execute the runner from # 3. Write log files to -export GITLAB_RUNNER_DIR=${GFS_CI_ROOT}/GitLab/Runner +export GITLAB_RUNNER_DIR="${GFS_CI_ROOT}/GitLab/Runner" ######################################################################### # HPC and parallel execution settings From cf8188fcc1bda3d10693074bfdc6b4ff339066b4 Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 12:06:47 -0400 Subject: [PATCH 31/45] added double quote shell norms 2 --- ci/scripts/utils/launch_gitlab_runner.sh | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 6b900524e7c..0cc90aa7f1b 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -14,7 +14,7 @@ # Set the HOMEGFS_ variable to the root directory of the global workflow HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" # Get the hostname of the current machine -host=$(hostname) +host="$(hostname)" ######################################################################### # Set up runtime environment variables for accounts on supported machines @@ -23,7 +23,7 @@ host=$(hostname) # Source the detect_machine.sh script to determine the MACHINE_ID source "${HOMEGFS_}/ush/detect_machine.sh" # Check the MACHINE_ID and set up the environment accordingly -case ${MACHINE_ID} in +case "${MACHINE_ID}" in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) echo "Launching GitLab Runner on ${MACHINE_ID}";; noaacloud ) @@ -37,13 +37,13 @@ esac # This file contains platform-specific variables such as GITLAB_URL, GITLAB_BUILDS_DIR, # and GITLAB_RUNNER_DIR which are required for runner registration and execution # See config.gaeac6 for details on these variables -source ${HOMEGFS_}/ci/platforms/config.${MACHINE_ID} +source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" # Change to the GitLab runner directory defined in the platform config -cd ${GITLAB_RUNNER_DIR} +cd "${GITLAB_RUNNER_DIR}" # Set the log file name with the current date and time -GITLAB_LOG=launched_gitlab_runner-$(date +%Y%m%d%M).log +GITLAB_LOG="launched_gitlab_runner-$(date +%Y%m%d%M).log" # Set the GitLab runner name - this name will appear in the GitLab UI GITLAB_RUNNER_NAME="RDHPCS Gaea C6" # Remove any existing log file @@ -61,20 +61,20 @@ echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" # 1. The second command-line argument # 2. The GITLAB_RUNNER_TOKEN environment variable # 3. A gitlab_token file in the current directory -GITLAB_RUNNER_TOKEN=${2:-${GITLAB_RUNNER_TOKEN}} -if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then +GITLAB_RUNNER_TOKEN="${2:-${GITLAB_RUNNER_TOKEN}}" +if [[ -z "${GITLAB_RUNNER_TOKEN}" ]]; then if [[ -f gitlab_token ]]; then source gitlab_token fi fi -if [[ -z ${GITLAB_RUNNER_TOKEN} ]]; then +if [[ -z "${GITLAB_RUNNER_TOKEN}" ]]; then echo "ERROR: GITLAB_RUNNER_TOKEN not set" exit 1 fi # Download the GitLab runner binary if it does not exist if [[ ! -f gitlab-runner ]]; then - curl -L --output $PWD/gitlab-runner https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 + curl -L --output "$PWD/gitlab-runner" https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 chmod +x ./gitlab-runner fi @@ -83,7 +83,7 @@ fi # Registers a new GitLab runner with the GitLab server ######################################################################### -if [[ $1 == "register" ]]; then +if [[ "${1}" == "register" ]]; then # Register the GitLab runner with the following parameters: # -n: Run in non-interactive mode # -t: Registration token from GitLab @@ -93,7 +93,7 @@ if [[ $1 == "register" ]]; then # --builds-dir: Directory where builds will be stored (from config.gaeac6) # --custom_build_dir-enabled: Enable custom build directories # --request-concurrency: Number of concurrent requests that can be handled - ./gitlab-runner register -n -t ${GITLAB_RUNNER_TOKEN} --url ${GITLAB_URL} --executor shell --shell bash --builds-dir ${GITLAB_BUILDS_DIR} --custom_build_dir-enabled true --request-concurrency 24 + ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 # Set the concurrent job limit in the GitLab runner config file sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml @@ -104,7 +104,7 @@ fi # Starts a GitLab runner in the background ######################################################################### -if [[ $1 == "run" ]]; then +if [[ "${1}" == "run" ]]; then # Construct the command to run the GitLab runner # nohup: Run the command immune to hangups # --working-directory: Directory where the runner will store its working files (from config.gaeac6) @@ -115,10 +115,10 @@ if [[ $1 == "run" ]]; then echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${PWD}/${GITLAB_LOG}" # Run the command in the background and redirect output to the log file - nohup $COMMAND >> ${GITLAB_LOG} 2>&1 & + nohup $COMMAND >> "${GITLAB_LOG}" 2>&1 & # Display the current contents of the log file - cat ${GITLAB_LOG} + cat "${GITLAB_LOG}" fi ######################################################################### @@ -126,7 +126,7 @@ fi # Removes a GitLab runner from the GitLab server ######################################################################### -if [[ $1 == "unregister" ]]; then +if [[ "${1}" == "unregister" ]]; then # Unregister the GitLab runner by name - ./gitlab-runner unregister --name ${GITLAB_RUNNER_NAME} + ./gitlab-runner unregister --name "${GITLAB_RUNNER_NAME}" fi From 7298094a6dd44c27bf7c9374a9f30441ec02b9ae Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 12:10:28 -0400 Subject: [PATCH 32/45] couple of more shell norms --- ci/scripts/utils/launch_gitlab_runner.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 0cc90aa7f1b..ce82495c0cb 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -49,7 +49,7 @@ GITLAB_RUNNER_NAME="RDHPCS Gaea C6" # Remove any existing log file rm -f "${LOG}" # Log the registration details -echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" +echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" || true echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" ######################################################################### @@ -74,7 +74,7 @@ fi # Download the GitLab runner binary if it does not exist if [[ ! -f gitlab-runner ]]; then - curl -L --output "$PWD/gitlab-runner" https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 + curl -L --output "${PWD}/gitlab-runner" https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-linux-amd64 chmod +x ./gitlab-runner fi From 3b4d04fb01a61d52920b07c567a6f27bcd39e89e Mon Sep 17 00:00:00 2001 From: Terry McGUinness Date: Mon, 24 Mar 2025 12:48:59 -0400 Subject: [PATCH 33/45] couple of last missed shell norms had to add DATE for date --- ci/scripts/utils/launch_gitlab_runner.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index ce82495c0cb..6feff0e67de 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -e + ######################################################################### # launch_gitlab_runner.sh - Script to manage GitLab runners for CI/CD # @@ -40,16 +42,17 @@ esac source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" # Change to the GitLab runner directory defined in the platform config -cd "${GITLAB_RUNNER_DIR}" +cd "${GITLAB_RUNNER_DIR}" || exit 1 # Set the log file name with the current date and time -GITLAB_LOG="launched_gitlab_runner-$(date +%Y%m%d%M).log" +DATE=$(date +%Y%m%d%M) || true +GITLAB_LOG="launched_gitlab_runner-${DATE}.log" # Set the GitLab runner name - this name will appear in the GitLab UI GITLAB_RUNNER_NAME="RDHPCS Gaea C6" # Remove any existing log file rm -f "${LOG}" # Log the registration details -echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at $(date)" >> "${GITLAB_LOG}" || true +echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at ${DATE}" >> "${GITLAB_LOG}" echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" ######################################################################### @@ -115,7 +118,7 @@ if [[ "${1}" == "run" ]]; then echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${PWD}/${GITLAB_LOG}" # Run the command in the background and redirect output to the log file - nohup $COMMAND >> "${GITLAB_LOG}" 2>&1 & + nohup "${COMMAND}" >> "${GITLAB_LOG}" 2>&1 & # Display the current contents of the log file cat "${GITLAB_LOG}" From 4031d6361e531e46a6bfd142a76cd015c8844504 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 11:50:03 -0400 Subject: [PATCH 34/45] add CI to GITLAB_BUILDS_DIR to match GitLab reserved varible names to follow the form GITLAB_ --- ci/platforms/config.gaeac6 | 2 +- ci/scripts/utils/launch_gitlab_runner.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 6931b177dad..83bc166aa52 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -54,7 +54,7 @@ export GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed # Used in launch_gitlab_runner.sh for: # 1. --builds-dir parameter during 'register' # 2. --working-directory parameter during 'run' -export GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI +export GITLAB_CI_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI # Directory for GitLab runner # Used by launch_gitlab_runner.sh as the directory to: diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 6feff0e67de..32fa22dab6d 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -36,7 +36,7 @@ case "${MACHINE_ID}" in esac # Source the platform-specific configuration file -# This file contains platform-specific variables such as GITLAB_URL, GITLAB_BUILDS_DIR, +# This file contains platform-specific variables such as GITLAB_URL, GITLAB_CI_BUILDS_DIR, # and GITLAB_RUNNER_DIR which are required for runner registration and execution # See config.gaeac6 for details on these variables source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" @@ -96,7 +96,7 @@ if [[ "${1}" == "register" ]]; then # --builds-dir: Directory where builds will be stored (from config.gaeac6) # --custom_build_dir-enabled: Enable custom build directories # --request-concurrency: Number of concurrent requests that can be handled - ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 + ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_CI_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 # Set the concurrent job limit in the GitLab runner config file sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml @@ -111,7 +111,7 @@ if [[ "${1}" == "run" ]]; then # Construct the command to run the GitLab runner # nohup: Run the command immune to hangups # --working-directory: Directory where the runner will store its working files (from config.gaeac6) - COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_BUILDS_DIR}" + COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_CI_BUILDS_DIR}" # --user ${USER}" # This line is commented out in the original script # Print the command and log file location From 8b39060b9279a660bba9fe84fc51b3b09b15b798 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 12:05:50 -0400 Subject: [PATCH 35/45] pared down comments in config.gaeac6 --- ci/platforms/config.gaeac6 | 48 +++++++++++++------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 83bc166aa52..3d1d75892d9 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -4,7 +4,6 @@ # config.gaeac6 - Platform-specific configuration for Gaea C6 # # This file contains environment variables used by CI/CD scripts, -# including the launch_gitlab_runner.sh script. It defines paths, # directories, and settings specific to the Gaea C6 platform. ######################################################################### @@ -16,7 +15,7 @@ export GFS_CI_ROOT="/ncrc/proj/nggps_emc/${USER}/GFS_CI_CD" export ICSDIR_ROOT=/gpfs/f6/bil-fire8/world-shared/global/glopara/data/ICSDIR ######################################################################### -# Jenkins-specific configuration settings +# Jenkins configuration settings ######################################################################### # JENKINS launch directory for agent - Where Jenkins agents are launched from @@ -28,52 +27,37 @@ export JENKINS_WORK_DIR="${GFS_CI_ROOT}/Jenkins/workspace" # NOTE: JENKINS custom_workspace directory where CI jobs are run # /gpfs/f6/drsa-precip3/proj-shared/global/CI -# is defined in the Jenkinsfile - -######################################################################### -# Test and CI directories -######################################################################### - -# CTest functional test directories for pre-staged input data -# Contains data needed for functional tests -export STAGED_TESTS_DIR="${GFS_CI_ROOT}/STAGED_TESTS_DIR" - -# CI BASH test directories - Used for bash script testing -export GFS_BASH_CI_ROOT="${GFS_CI_ROOT}/GFS_BASH_CI" +# and is defined in $HOMEgfs/ci/Jenkinsfile + ######################################################################### # GitLab CI configuration # These variables are referenced directly by launch_gitlab_runner.sh ######################################################################### -# GitLab URL for the CI # Used in the 'register' step of launch_gitlab_runner.sh for --url parameter export GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed # Directory for GitLab builds -# Used in launch_gitlab_runner.sh for: -# 1. --builds-dir parameter during 'register' -# 2. --working-directory parameter during 'run' +# Used in launch_gitlab_runner.sh for location of builds export GITLAB_CI_BUILDS_DIR=/gpfs/f6/drsa-precip3/scratch/role.glopara/GFS_CI_ROOT/GITLAB/CI -# Directory for GitLab runner -# Used by launch_gitlab_runner.sh as the directory to: -# 1. Store the gitlab-runner binary -# 2. Execute the runner from -# 3. Write log files to +# Directory for GitLab runner used by launch_gitlab_runner.sh export GITLAB_RUNNER_DIR="${GFS_CI_ROOT}/GitLab/Runner" + ######################################################################### -# HPC and parallel execution settings +# CI CRON system configuration ######################################################################### +export GFS_BASH_CI_ROOT="${GFS_CI_ROOT}/GFS_BASH_CI" +export max_concurrent_cases=5 # number of concurrent cases that can run simultaneously +export max_concurrent_pr=4 # number of concurrent pull requests -# HPC account information - For job submission -export HPC_ACCOUNT=drsa-precip3 -# Maximum number of concurrent cases that can run simultaneously -# Helps prevent overloading the system -export max_concurrent_cases=5 +######################################################################### +# CTest functional test directories for pre-staged input data +######################################################################### +export STAGED_TESTS_DIR="${GFS_CI_ROOT}/STAGED_TESTS_DIR" -# Maximum number of concurrent pull requests -# Limits the number of PR builds running at the same time -export max_concurrent_pr=4 +# HPC account information - For job submission +export HPC_ACCOUNT=drsa-precip3 \ No newline at end of file From d13f2b6d6744576efe6ff2b1f1e3cfbb41d45c02 Mon Sep 17 00:00:00 2001 From: "Terry.McGuinness" Date: Tue, 25 Mar 2025 17:02:33 +0000 Subject: [PATCH 36/45] cleaned up comments in gitlab runner launch script --- ci/scripts/utils/launch_gitlab_runner.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 32fa22dab6d..7fed5d4b40d 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -38,7 +38,6 @@ esac # Source the platform-specific configuration file # This file contains platform-specific variables such as GITLAB_URL, GITLAB_CI_BUILDS_DIR, # and GITLAB_RUNNER_DIR which are required for runner registration and execution -# See config.gaeac6 for details on these variables source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" # Change to the GitLab runner directory defined in the platform config @@ -49,7 +48,6 @@ DATE=$(date +%Y%m%d%M) || true GITLAB_LOG="launched_gitlab_runner-${DATE}.log" # Set the GitLab runner name - this name will appear in the GitLab UI GITLAB_RUNNER_NAME="RDHPCS Gaea C6" -# Remove any existing log file rm -f "${LOG}" # Log the registration details echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at ${DATE}" >> "${GITLAB_LOG}" @@ -90,10 +88,9 @@ if [[ "${1}" == "register" ]]; then # Register the GitLab runner with the following parameters: # -n: Run in non-interactive mode # -t: Registration token from GitLab - # --url: URL of the GitLab server (from config.gaeac6) + # --url: URL of the GitLab server (from config.MACHINE_ID) # --executor: Type of executor (shell in this case) - # --shell: Shell to use for job execution - # --builds-dir: Directory where builds will be stored (from config.gaeac6) + # --builds-dir: Directory where builds will be stored (from config.MACHINE_ID) # --custom_build_dir-enabled: Enable custom build directories # --request-concurrency: Number of concurrent requests that can be handled ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_CI_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 @@ -103,8 +100,7 @@ if [[ "${1}" == "register" ]]; then fi ######################################################################### -# RUN argument handling -# Starts a GitLab runner in the background +# RUN: Starts a GitLab runner in the background ######################################################################### if [[ "${1}" == "run" ]]; then @@ -125,8 +121,7 @@ if [[ "${1}" == "run" ]]; then fi ######################################################################### -# UNREGISTER argument handling -# Removes a GitLab runner from the GitLab server +# UNREGISTER: Removes a GitLab runner from the GitLab server ######################################################################### if [[ "${1}" == "unregister" ]]; then From 5a535479ad75e878c9af52e7211e0dce7e0841a2 Mon Sep 17 00:00:00 2001 From: "emc.glopara" Date: Tue, 25 Mar 2025 19:19:25 +0000 Subject: [PATCH 37/45] fixed nohup command for gitlab runner laucnch script --- ci/scripts/utils/launch_gitlab_runner.sh | 30 +++++++++--------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 7fed5d4b40d..5652fec0f90 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -44,14 +44,9 @@ source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" cd "${GITLAB_RUNNER_DIR}" || exit 1 # Set the log file name with the current date and time -DATE=$(date +%Y%m%d%M) || true -GITLAB_LOG="launched_gitlab_runner-${DATE}.log" -# Set the GitLab runner name - this name will appear in the GitLab UI -GITLAB_RUNNER_NAME="RDHPCS Gaea C6" +DATE=$(date +%Y-%m%d%M) || true +GITLAB_LOG="${PWD}/launched_gitlab_runner-${DATE}.log" rm -f "${LOG}" -# Log the registration details -echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at ${DATE}" >> "${GITLAB_LOG}" -echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" ######################################################################### # GitLab Token Handling @@ -85,6 +80,9 @@ fi ######################################################################### if [[ "${1}" == "register" ]]; then + + echo "Registering GitLab Runner ${MACHINE_ID} on host ${host} at ${DATE}" >> "${GITLAB_LOG}" + echo "with runner name: ${GITLAB_RUNNER_NAME}" >> "${GITLAB_LOG}" # Register the GitLab runner with the following parameters: # -n: Run in non-interactive mode # -t: Registration token from GitLab @@ -97,6 +95,7 @@ if [[ "${1}" == "register" ]]; then # Set the concurrent job limit in the GitLab runner config file sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml + exit 0 fi ######################################################################### @@ -104,20 +103,13 @@ fi ######################################################################### if [[ "${1}" == "run" ]]; then - # Construct the command to run the GitLab runner - # nohup: Run the command immune to hangups - # --working-directory: Directory where the runner will store its working files (from config.gaeac6) + # --working-directory: Directory where the runner will store its working files (from config.$MACHINE_ID) COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_CI_BUILDS_DIR}" - # --user ${USER}" # This line is commented out in the original script - - # Print the command and log file location - echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${PWD}/${GITLAB_LOG}" - - # Run the command in the background and redirect output to the log file - nohup "${COMMAND}" >> "${GITLAB_LOG}" 2>&1 & - - # Display the current contents of the log file + echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${GITLAB_LOG}" + echo -e "Running gitlab-runner with the command:${COMMAND}" >& "${GITLAB_LOG}" + ${COMMAND} >> "${GITLAB_LOG}" 2>&1 & cat "${GITLAB_LOG}" + exit 0 fi ######################################################################### From a33d5a8590c00a44fb99e51358a4ed89fa9e934d Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 15:32:04 -0400 Subject: [PATCH 38/45] fix setting of working-directory where runner is launched --- ci/scripts/utils/launch_gitlab_runner.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index 5652fec0f90..d5137f0df2b 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -103,8 +103,9 @@ fi ######################################################################### if [[ "${1}" == "run" ]]; then - # --working-directory: Directory where the runner will store its working files (from config.$MACHINE_ID) - COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_CI_BUILDS_DIR}" + # --working-directory: Directory where the runner is launched and keeps its working files (from config.$MACHINE_ID) + # do not confuse this with GitLabs CI_BUILDS_DIR which is designate by GFS_CI_BUILDS_DIR and is where the builds are stored + COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_RUNNER_DIR}" echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${GITLAB_LOG}" echo -e "Running gitlab-runner with the command:${COMMAND}" >& "${GITLAB_LOG}" ${COMMAND} >> "${GITLAB_LOG}" 2>&1 & From c03ba5eb54ea7b7dec546344414ca94df7f396d3 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 15:45:46 -0400 Subject: [PATCH 39/45] clean up pipeline scipt with some minor comments for the three main stages --- ci/.gitlab-ci.yml | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index f20392d1473..2cc74e36798 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -2,7 +2,8 @@ stages: - build - create_experiments - run_tests - + +# Global variables variables: BUILD_DATE: 'today' GIT_CLONE_PATH: '$CI_BUILDS_DIR/global-workflow-${BUILD_DATE}' @@ -12,11 +13,12 @@ variables: RUNNER_SCRIPT_TIMEOUT: 6h RUNNER_AFTER_SCRIPT_TIMEOUT: 6h + +# Build stage for the global workflow on compute nodes build: variables: GIT_STRATEGY: clone GIT_SUBMODULE_STRATEGY: recursive - stage: build script: - export CI_PROJECT_DIR=$HOMEGFS @@ -24,28 +26,23 @@ build: - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh - mkdir -p ${RUNTESTS_DIR} - # NEED skip_ci_on_hosts to work (skipping for now) - # - source ${HOMEGFS}/workflow/gw_setup.sh - # - cases_on_host=$(${HOMEGFS}/ci/scripts/utils/get_host_case_list.py $MACHINE) - # - echo "cases_on_host=$cases_on_host" >> variables.env + # TODO - Add more machines to the list and make + # the setup and run_tests stages 2D matrices parallel: matrix: - MACHINE: ["gaeac6"] - #artifacts: - # reports: - # dotenv: variables.env tags: - ${MACHINE} +# Create experiments stage from a fixed list of cases in $HOMEGFS/ci/cases/pr +# TODO: Find a way to dynamically generate the list of cases (maybe using Jinja2) setup_experiments: variables: GIT_STRATEGY: none stage: create_experiments script: - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName}.yaml - #rules: - # - if: '$cases_on_host =~ /$caseName/' parallel: matrix: - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] @@ -54,6 +51,8 @@ setup_experiments: dependencies: - build +# Running the list of experiments created in the previous stage +# using the run-check_ci.sh script from $HOMEgfs/ci/scripts directory run_tests: variables: GIT_STRATEGY: none @@ -62,8 +61,6 @@ run_tests: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - pslot=$(${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${CI_BUILDS_DIR} ${pslot} global-workflow-${BUILD_DATE} - #rules: - # - if: '$cases_on_host =~ /$caseName/' parallel: matrix: - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] From 4caa443a329b6812cf1a056d22d464a904a1a721 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 17:10:27 -0400 Subject: [PATCH 40/45] moved BUILD identifier TODAY as a dir level and put RUNTESTS under it with the global-workflow clone dir, aka HOMEgfs --- ci/.gitlab-ci.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 2cc74e36798..40cf7e4b7ff 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -5,10 +5,10 @@ stages: # Global variables variables: - BUILD_DATE: 'today' - GIT_CLONE_PATH: '$CI_BUILDS_DIR/global-workflow-${BUILD_DATE}' + BUILD: 'TODAY' + GIT_CLONE_PATH: '$CI_BUILDS_DIR/${BUILD}/global-workflow' HOMEGFS: $GIT_CLONE_PATH - RUNTESTS_DIR: $CI_BUILDS_DIR/RUNTESTS + RUNTESTS_DIR: $CI_BUILDS_DIR/${BUILD}/RUNTESTS GIT_DEPTH: 1 RUNNER_SCRIPT_TIMEOUT: 6h RUNNER_AFTER_SCRIPT_TIMEOUT: 6h @@ -21,7 +21,6 @@ build: GIT_SUBMODULE_STRATEGY: recursive stage: build script: - - export CI_PROJECT_DIR=$HOMEGFS - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - ci/scripts/utils/ci_utils_wrapper.sh build_compute - sorc/link_workflow.sh @@ -60,7 +59,7 @@ run_tests: script: - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - pslot=$(${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${CI_BUILDS_DIR} ${pslot} global-workflow-${BUILD_DATE} + - ${HOMEGFS}/ci/scripts/run-check_ci.sh ${CI_BUILDS_DIR}/${BUILD} ${pslot} global-workflow parallel: matrix: - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] From 71cefaa9c3cfd4ecc03b54cf2f4fba794dc1bf69 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Tue, 25 Mar 2025 17:38:13 -0400 Subject: [PATCH 41/45] needed to export RUNTESTS top point to where to create EXPDIRs --- ci/.gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index 40cf7e4b7ff..1882234bb87 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -41,6 +41,7 @@ setup_experiments: GIT_STRATEGY: none stage: create_experiments script: + - export RUNTESTS=${RUNTESTS_DIR} - ${HOMEGFS}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/ci/cases/pr/${caseName}.yaml parallel: matrix: From 1ce0901163046b951c6a62ec217b2e58fdeecdba Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Tue, 25 Mar 2025 18:08:59 -0400 Subject: [PATCH 42/45] Update config.gaeac6 end of file space --- ci/platforms/config.gaeac6 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 3d1d75892d9..5d6e5eeacd8 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -60,4 +60,4 @@ export max_concurrent_pr=4 # number of concurrent pull requests export STAGED_TESTS_DIR="${GFS_CI_ROOT}/STAGED_TESTS_DIR" # HPC account information - For job submission -export HPC_ACCOUNT=drsa-precip3 \ No newline at end of file +export HPC_ACCOUNT=drsa-precip3 From 5b5a0c1287dc04fb413a948a63b1c82e7425eb74 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 26 Mar 2025 11:38:49 -0400 Subject: [PATCH 43/45] moved GFS_CI_ROOT on Geae C6 to precip3 proj-shared --- ci/platforms/config.gaeac6 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/platforms/config.gaeac6 b/ci/platforms/config.gaeac6 index 5d6e5eeacd8..f011dc262a5 100644 --- a/ci/platforms/config.gaeac6 +++ b/ci/platforms/config.gaeac6 @@ -8,7 +8,7 @@ ######################################################################### # Main CI root directory - Base directory for all CI/CD operations -export GFS_CI_ROOT="/ncrc/proj/nggps_emc/${USER}/GFS_CI_CD" +export GFS_CI_ROOT="/gpfs/f6/drsa-precip3/proj-shared/${USER}/GFS_CI_CD" # ICSDIR root directory - Contains initial condition data # Used by create_experiment.py for setting up test cases @@ -61,3 +61,4 @@ export STAGED_TESTS_DIR="${GFS_CI_ROOT}/STAGED_TESTS_DIR" # HPC account information - For job submission export HPC_ACCOUNT=drsa-precip3 + From c0440ec49470f4e37d28450cba4267796d35cc8e Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Fri, 28 Mar 2025 16:46:23 -0400 Subject: [PATCH 44/45] Update ci/scripts/utils/launch_gitlab_runner.sh ok Co-authored-by: Rahul Mahajan --- ci/scripts/utils/launch_gitlab_runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index d5137f0df2b..ff22d26e9ee 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -44,7 +44,7 @@ source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}" cd "${GITLAB_RUNNER_DIR}" || exit 1 # Set the log file name with the current date and time -DATE=$(date +%Y-%m%d%M) || true +DATE=$(date +%Y%m%d%M) || true GITLAB_LOG="${PWD}/launched_gitlab_runner-${DATE}.log" rm -f "${LOG}" From 3ca0f4087b90203fff3133d788f3b048154ec48f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Fri, 28 Mar 2025 16:47:18 -0400 Subject: [PATCH 45/45] Update ci/scripts/utils/launch_gitlab_runner.sh Co-authored-by: Rahul Mahajan --- ci/scripts/utils/launch_gitlab_runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/utils/launch_gitlab_runner.sh b/ci/scripts/utils/launch_gitlab_runner.sh index ff22d26e9ee..653c4ccf771 100755 --- a/ci/scripts/utils/launch_gitlab_runner.sh +++ b/ci/scripts/utils/launch_gitlab_runner.sh @@ -46,7 +46,7 @@ cd "${GITLAB_RUNNER_DIR}" || exit 1 # Set the log file name with the current date and time DATE=$(date +%Y%m%d%M) || true GITLAB_LOG="${PWD}/launched_gitlab_runner-${DATE}.log" -rm -f "${LOG}" +rm -f "${GITLAB_LOG}" ######################################################################### # GitLab Token Handling