diff --git a/.github/workflows/ci_unit_tests.yaml b/.github/workflows/ci_unit_tests.yaml index 450d2546b8e..f5305dc7ca3 100644 --- a/.github/workflows/ci_unit_tests.yaml +++ b/.github/workflows/ci_unit_tests.yaml @@ -26,8 +26,7 @@ jobs: sudo apt-get install -y perl libxml-libxml-perl libxml-libxslt-perl libdatetime-perl python -m pip install --upgrade pip python -m pip install -r global-workflow/dev/workflow/requirements.txt - pip install pytest - pip install wget + pip install pytest pytest-cov pyyaml jinja2 wget - name: Cache Rocoto id: cache-rocoto @@ -60,7 +59,12 @@ jobs: cd global-workflow/sorc git submodule update --init -j 2 wxflow ufs_model.fd ./link_workflow.sh - cd ../dev/ci/scripts/tests + + # Create test data directory for unit tests + mkdir -p ../dev/ci/scripts/unittests/test_data + echo "Creating test directories and files for CI tests" + + cd ../dev/ci/scripts/unittests pytest -v --junitxml test-results.xml @@ -68,6 +72,6 @@ jobs: if: always() uses: EnricoMi/publish-unit-test-result-action@v2 with: - files: global-workflow/dev/ci/scripts/tests/test-results.xml + files: global-workflow/dev/ci/scripts/unittests/test-results.xml job_summary: true comment_mode: off diff --git a/.github/workflows/pw_aws_ci.yaml b/.github/workflows/pw_aws_ci.yaml index 490e52cb0fa..e7c05f3e296 100644 --- a/.github/workflows/pw_aws_ci.yaml +++ b/.github/workflows/pw_aws_ci.yaml @@ -157,7 +157,7 @@ jobs: - name: Run Experiment ${{ matrix.case }} run: | cd ${{ env.TEST_DIR }}/HOMEgfs - ./dev/ci/scripts/run-check_ci.sh ${{ env.TEST_DIR }} ${{ matrix.case }}.${{ github.run_id }} HOMEgfs + ./dev/ci/scripts/run_check_ci.sh ${{ env.TEST_DIR }} ${{ matrix.case }}.${{ github.run_id }} HOMEgfs clean-up: needs: run-experiments diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000000..a08bbd63ddb --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,80 @@ +# ========================================================================== +# Main GitLab CI Configuration for the global-workflow Project +# ========================================================================== +# +# This is the primary CI/CD configuration file that orchestrates the GitLab CI +# testing framework for the global-workflow project. It defines pipeline stages +# and includes specialized configuration files for different testing aspects. +# +# The pipeline architecture supports: +# 1. Multiple host environments (hera, gaeac6, etc.) +# 2. Two primary testing modes: +# - PR validation via standard test cases +# - CTest-based tests triggered via GitHub API +# +# Included files: +# - .gitlab-ci-ctests.yml: Handles CMake/CTest-based testing framework +# - .gitlab-ci-cases.yml: Defines templates for standard experiment cases +# - .gitlab-ci-hosts.yml: Contains host-specific configurations and test matrices +# that can be extended to support additional hosts +# +# The host configuration in .gitlab-ci-hosts.yml is designed to be easily +# extended with new computing platforms and allows per-host specification +# of which test cases to run. + +stages: + - build + - create_experiments + - run_tests + - finalize + +variables: + BUILD: '' # Placeholder for control over build directory path + # Using GW prefix to avoid confusion with GitLab predefined variables + GW_RUN_PATH: ${CI_BUILDS_DIR}/${BUILD}/${CI_COMMIT_SHORT_SHA} + # Overriding GitLab's predefined GIT_CLONE_PATH variable to specify the exact location + # where the repository should be cloned. This works with custom_build_dir-enabled in the runner config. + GIT_CLONE_PATH: '${GW_RUN_PATH}/global-workflow' + GW_HOMEgfs: ${GIT_CLONE_PATH} + RUNTESTS_DIR: ${GW_RUN_PATH}/RUNTESTS + GIT_DEPTH: 10 + RUNNER_SCRIPT_TIMEOUT: 6h + RUNNER_AFTER_SCRIPT_TIMEOUT: 6h + # Controls pipeline behavior: CTests (true) or PR cases (false) + GITHUB_API_TRIGGER: ${GITHUB_API_TRIGGER:-"false"} + PR_NUMBER: ${PR_NUMBER:-"0"} + +# Include specialized pipeline configuration files +include: + - local: 'dev/ci/gitlab-ci-ctests.yml' # CTest framework configuration + - local: 'dev/ci/gitlab-ci-cases.yml' # Standard test case templates + - local: 'dev/ci/gitlab-ci-hosts.yml' # Host-specific configurations + +.base_config: + variables: + GIT_STRATEGY: none + +# Common build template for all modalities +.build_template: + variables: + GIT_STRATEGY: clone + GIT_SUBMODULE_STRATEGY: recursive + GIT_SSL_NO_VERIFY: "true" # Address potential certificate verification issues + stage: build + script: + - | + set -e # Fail the job if any command fails + echo "Setting up build environment for ${machine}" + echo "Using build directory ${GW_HOMEgfs}" + git submodule status + + dev/ci/scripts/utils/ci_utils.sh build + build_status=$? + + if [ $build_status -ne 0 ]; then + echo "Build failed with exit code $build_status" + exit $build_status + fi + + sorc/link_workflow.sh + mkdir -p ${RUNTESTS_DIR} diff --git a/CMakeLists.txt b/CMakeLists.txt index 5044689f7e2..6470de8ca7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,4 +24,4 @@ endif() # add_subdirectory(sorc) # Setup tests -add_subdirectory(ctests) +add_subdirectory(dev/ctests) diff --git a/dev/ci/.gitlab-ci.yml b/dev/ci/.gitlab-ci.yml deleted file mode 100644 index 810334c0ace..00000000000 --- a/dev/ci/.gitlab-ci.yml +++ /dev/null @@ -1,69 +0,0 @@ -stages: - - build - - create_experiments - - run_tests - -# Global variables -variables: - BUILD: 'TODAY' - GIT_CLONE_PATH: '${CI_BUILDS_DIR}/${BUILD}/global-workflow' - HOMEGFS: ${GIT_CLONE_PATH} - RUNTESTS_DIR: ${CI_BUILDS_DIR}/${BUILD}/RUNTESTS - GIT_DEPTH: 1 - RUNNER_SCRIPT_TIMEOUT: 6h - RUNNER_AFTER_SCRIPT_TIMEOUT: 6h - - -# Build stage for the global workflow on compute nodes -build: - variables: - GIT_STRATEGY: clone - GIT_SUBMODULE_STRATEGY: recursive - stage: build - script: - - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - - dev/ci/scripts/utils/ci_utils_wrapper.sh build_compute - - sorc/link_workflow.sh - - mkdir -p ${RUNTESTS_DIR} - # TODO - Add more machines to the list and make (next PR) - #parallel: - # matrix: - # - MACHINE: ["gaeac6"] - tags: - - gaeac6 - - -# Create experiments stage from a fixed list of cases in $HOMEGFS/dev/ci/cases/pr -# TODO: Next PR has simi-dynamic caseName lists -setup_experiments: - variables: - GIT_STRATEGY: none - stage: create_experiments - script: - - export RUNTESTS=${RUNTESTS_DIR} - - ${HOMEGFS}/dev/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEGFS}/dev/ci/cases/pr/${caseName}.yaml - parallel: - matrix: - - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] - tags: - - gaeac6 - dependencies: - - build - -# Running the list of experiments created in the previous stage -# using the run-check_ci.sh script from $HOMEgfs/dev/ci/scripts directory -run_tests: - variables: - GIT_STRATEGY: none - stage: run_tests - script: - - echo "Using build directory ${HOMEGFS} (dated ${BUILD_DATE})" - - pslot=$(${HOMEGFS}/dev/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS_DIR} ${caseName}) - - ${HOMEGFS}/dev/ci/scripts/run-check_ci.sh ${CI_BUILDS_DIR}/${BUILD} ${pslot} global-workflow - parallel: - matrix: - - caseName: ["C48_ATM", "C48mx500_3DVarAOWCDA", "C48mx500_hybAOWCDA", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar", "C96C48_hybatmDA", "C96C48_hybatmaerosnowDA"] - tags: - - gaeac6 - dependencies: - - setup_experiments diff --git a/dev/ci/Jenkinsfile b/dev/ci/Jenkinsfile index 3d31079e110..5bd5f555f02 100644 --- a/dev/ci/Jenkinsfile +++ b/dev/ci/Jenkinsfile @@ -129,7 +129,7 @@ pipeline { def error_logs_message = "" dir("${HOMEgfs}/sorc") { try { - sh(script: "${HOMEgfs_dev}/ci/scripts/utils/ci_utils_wrapper.sh build_compute") // build the global-workflow executables + sh(script: "${HOMEgfs_dev}/ci/scripts/utils/ci_utils.sh build") // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { @@ -203,7 +203,7 @@ pipeline { try { error_output = sh(script: """ source ${HOMEgfs_dev}/ush/gw_setup.sh - ${HOMEgfs_dev}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs_dev}/ci/cases/pr/${caseName}.yaml + ${HOMEgfs_dev}/ci/scripts/utils/ci_utils.sh create_experiment ${HOMEgfs_dev}/ci/cases/pr/${caseName}.yaml """, returnStdout: true).trim() } catch (Exception error_create) { sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body '${caseName} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n```\n${error_output}```' """) @@ -215,22 +215,22 @@ pipeline { stage("Running ${caseName}") { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { - def pslot = sh(script: "${HOMEgfs_dev}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() + def pslot = sh(script: "${HOMEgfs_dev}/ci/scripts/utils/ci_utils.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { sh(script: """ source ${HOMEgfs_dev}/ush/gw_setup.sh - ${HOMEgfs_dev}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' + ${HOMEgfs_dev}/ci/scripts/run_check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' """) sh(script: """ source ${HOMEgfs_dev}/ush/gw_setup.sh - ${HOMEgfs_dev}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot} + ${HOMEgfs_dev}/ci/scripts/utils/ci_utils.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot} """) } catch (Exception error_experment) { sh(script: """ source ${HOMEgfs_dev}/ush/gw_setup.sh - ${HOMEgfs_dev}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot} + ${HOMEgfs_dev}/ci/scripts/utils/ci_utils.sh cancel_batch_jobs ${pslot} """) ws(CUSTOM_WORKSPACE) { def error_logs = "" @@ -300,9 +300,9 @@ pipeline { done """, returnStatus: true) sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - if (fileExists("${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log")) { - sh(script: """echo "**CI ${STATUS}** on ${Machine} in Build# ${env.BUILD_NUMBER}
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body-file ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log """, returnStatus: true) + if (fileExists("${CUSTOM_WORKSPACE}/RUNTESTS/run_check_ci.log")) { + sh(script: """echo "**CI ${STATUS}** on ${Machine} in Build# ${env.BUILD_NUMBER}
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/run_check_ci.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/run_check_ci.log""", returnStatus: true) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body-file ${CUSTOM_WORKSPACE}/RUNTESTS/run_check_ci.log """, returnStatus: true) } if (STATUS == 'Passed') { try { diff --git a/dev/ci/Jenkinsfile4AWS b/dev/ci/Jenkinsfile4AWS index 899558087cb..d5b31065f24 100644 --- a/dev/ci/Jenkinsfile4AWS +++ b/dev/ci/Jenkinsfile4AWS @@ -134,7 +134,7 @@ pipeline { def error_logs_message = "" dir("${HOMEgfs}/sorc") { try { - // sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh build_compute") // build the global-workflow executables + // sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils.sh build") // build the global-workflow executables sh(script: './build_compute.sh -A ${USER} gfs gefs sfs') // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" @@ -205,10 +205,10 @@ pipeline { script { env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" try { - error_output = sh(script: """ + error_output = sh(script: \"\"\" source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml - """, returnStdout: true).trim() + ${HOMEgfs}/ci/scripts/utils/ci_utils.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml + \"\"\", returnStdout: true).trim() } catch (Exception error_create) { sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) error("Case ${caseName} failed to create experiment directory") @@ -219,22 +219,22 @@ pipeline { stage("Running ${caseName}") { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' + ${HOMEgfs}/ci/scripts/run_check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow' """) sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot} + ${HOMEgfs}/ci/scripts/utils/ci_utils.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot} """) } catch (Exception error_experment) { sh(script: """ source ${HOMEgfs}/workflow/gw_setup.sh - ${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot} + ${HOMEgfs}/ci/scripts/utils/ci_utils.sh cancel_batch_jobs ${pslot} """) ws(CUSTOM_WORKSPACE) { def error_logs = "" diff --git a/dev/ci/gitlab-ci-cases.yml b/dev/ci/gitlab-ci-cases.yml new file mode 100644 index 00000000000..773960871f4 --- /dev/null +++ b/dev/ci/gitlab-ci-cases.yml @@ -0,0 +1,42 @@ +# ========================================================================== +# Templates for Standard Test Cases +# ========================================================================== +# +# This file defines templates for setting up and running standard experiment +# test cases for the global-workflow project. These cases represent more +# comprehensive, end-to-end testing scenarios than the CTesting framework. +# +# These templates are used primarily for PR testing when GITHUB_API_TRIGGER=false. +# The actual test case matrix for each host is defined in .gitlab-ci-hosts.yml. +# +# Key templates: +# - .setup_template: Creates experiment directories based on case YAML files +# - .run_tests_template: Executes and validates the experiment workflows +# +# Test cases are defined as YAML files in the dev/ci/cases/pr directory and +# are referenced by name in the host configuration file's test matrix. + +# Template for experiment setup jobs +.setup_template: + extends: .base_config + stage: create_experiments + script: + - | + export RUNTESTS=${RUNTESTS_DIR} + ${GW_HOMEgfs}/dev/ci/scripts/utils/ci_utils.sh create_experiment ${GW_HOMEgfs}/dev/ci/cases/pr/${caseName}.yaml + exit $? + needs: + - build-${machine} + +# Template for test execution jobs +.run_tests_template: + extends: .base_config + stage: run_tests + script: + - | + echo "Using build directory $GW_HOMEgfs (dated $BUILD_DATE)" + pslot=$(${GW_HOMEgfs}/dev/ci/scripts/utils/ci_utils.sh get_pslot ${RUNTESTS_DIR} ${caseName}) + ${GW_HOMEgfs}/dev/ci/scripts/run_check_ci.sh ${GW_RUN_PATH} ${pslot} global-workflow + exit $? + needs: + - setup_experiments-${machine} diff --git a/dev/ci/gitlab-ci-ctests.yml b/dev/ci/gitlab-ci-ctests.yml new file mode 100644 index 00000000000..38a17e3cfab --- /dev/null +++ b/dev/ci/gitlab-ci-ctests.yml @@ -0,0 +1,53 @@ +# ========================================================================== +# Templates and Configuration for CTest-Based Testing +# ========================================================================== +# +# This file defines jobs and templates for running CMake/CTest-based fuctional +# tests in the global-workflow project. The CTests are designed to +# test specific Rocoto Jobs singluarly with predefined input data. +# +# CTests are primarily triggered via GitHub API for quick validation of PRs, +# whereas the full experiment cases are used for more extensive testing. +# +# Key components: +# - .create_ctests: Template for setting up the CMake/CTest environment +# - .run_ctests_template: Template for executing specific CTest labels +# +# CTests are defined in the dev/ctests directory and are categorized by labels, +# which are referenced in the hosts file to determine which tests to run on +# which platforms. + +# ======================================= +# Templates for CTests +# ======================================= + +# Setup job for ctests using CMake +.create_ctests: + extends: .base_config + stage: create_experiments + script: | + set -e # Fail the job if any command fails + echo "Setting up for ctests workflow" + source ${GW_HOMEgfs}/dev/ci/platforms/config.${machine} + cd ${GW_HOMEgfs}/dev/ctests + mkdir -p build + cd build + cmake -S ${GW_HOMEgfs} + ctest -N + num=$(ctest -N | grep "Total Tests" | awk "{print \$3}") + [ "$num" -gt 0 ] || { echo "No tests found"; exit 1; } + echo "CTests have been created and configured successfully" + needs: + - build-${machine} + +# Main template for CTest execution jobs +.run_ctests_template: + extends: .base_config + stage: run_tests + script: | + set -e # Fail the job if any command fails + echo "Running ${CTEST} tests in ${GW_HOMEgfs}/dev/ctests" + cd ${GW_HOMEgfs}/dev/ctests/build + ctest -L ${CTEST} --output-on-failure + needs: + - create_ctests-${machine} diff --git a/dev/ci/gitlab-ci-hosts.yml b/dev/ci/gitlab-ci-hosts.yml new file mode 100644 index 00000000000..dc4f23c4d57 --- /dev/null +++ b/dev/ci/gitlab-ci-hosts.yml @@ -0,0 +1,154 @@ +# ========================================================================== +# Host-Specific Configurations for the global-workflow CI Pipeline +# ========================================================================== +# +# This file defines host-specific job configurations for different computing +# platforms supported by the global-workflow testing pipeline. It's designed +# to be easily extendable with new computing platforms. +# +# Key features: +# - Per-host test case matrices that define which tests run on which hosts +# - Includes two sections supporting the standard PR Cases and CTests +# - Host-specific tags and variables for job routing to the correct runners +# - Conditional rules based on trigger type (PR vs API) +# +# The GITHUB_API_TRIGGER variable (set in the main .gitlab-ci.yml) controls +# whether to run CTests (true) or standard cases (false). + +# ======================================= +# Standard Cases configurations by host +# ======================================= + +# Host: Hera - Standard Cases +setup_experiments-hera: + extends: .setup_template + variables: + machine: hera + tags: + - hera + parallel: + matrix: + - caseName: ["C48mx500_hybAOWCDA", "C96C48_hybatmDA", "C48_S2SWA_gefs", "C96C48_hybatmaerosnowDA", "C48_S2SW", "C96_atm3DVar", "C48_ATM", "C96mx100_S2S", "C48mx500_3DVarAOWCDA"] + needs: + - build-hera + rules: + - if: $GITHUB_API_TRIGGER != "true" + +run_tests-hera: + extends: .run_tests_template + variables: + machine: hera + tags: + - hera + parallel: + matrix: + - caseName: ["C48mx500_hybAOWCDA", "C96C48_hybatmDA", "C48_S2SWA_gefs", "C96C48_hybatmaerosnowDA", "C48_S2SW", "C96_atm3DVar", "C48_ATM", "C96mx100_S2S", "C48mx500_3DVarAOWCDA"] + needs: + - setup_experiments-hera + rules: + - if: $GITHUB_API_TRIGGER != "true" + +# Host: GAEAC6 - Standard Cases +setup_experiments-gaeac6: + extends: .setup_template + variables: + machine: gaeac6 + tags: + - gaeac6 + parallel: + matrix: + - caseName: ["C48mx500_hybAOWCDA", "C96C48_hybatmDA", "C48_S2SWA_gefs", "C96C48_hybatmaerosnowDA", "C48_S2SW", "C96_atm3DVar", "C48_ATM", "C48mx500_3DVarAOWCDA"] + needs: + - build-gaeac6 + rules: + - if: $GITHUB_API_TRIGGER != "true" + +run_tests-gaeac6: + extends: .run_tests_template + variables: + machine: gaeac6 + tags: + - gaeac6 + parallel: + matrix: + - caseName: ["C48mx500_hybAOWCDA", "C96C48_hybatmDA", "C48_S2SWA_gefs", "C96C48_hybatmaerosnowDA", "C48_S2SW", "C96_atm3DVar", "C48_ATM", "C48mx500_3DVarAOWCDA"] + needs: + - setup_experiments-gaeac6 + rules: + - if: $GITHUB_API_TRIGGER != "true" + +# ======================================= +# CTests configurations by host +# ======================================= + +# Template for CTest jobs that will be used across machines +.ctests_cases_template: + extends: .run_ctests_template + stage: run_tests + parallel: + matrix: + - CTEST: ['C48_ATM_gfs_fcst_seg0', 'C48_S2SW_gfs_fcst_seg0', 'C48_S2SW_gfs_atmos_prod_f000-f003'] + +# Host-specific CTest setup jobs +create_ctests-hera: + extends: .create_ctests + stage: create_experiments + tags: + - hera + variables: + machine: hera + needs: + - build-hera + rules: + - if: $GITHUB_API_TRIGGER == "true" + +create_ctests-gaeac6: + extends: .create_ctests + stage: create_experiments + tags: + - gaeac6 + variables: + machine: gaeac6 + needs: + - build-gaeac6 + rules: + - if: $GITHUB_API_TRIGGER == "true" + +# Host: Hera - CTests +run_ctests-hera: + extends: .ctests_cases_template + tags: + - hera + needs: + - create_ctests-hera + rules: + - if: $GITHUB_API_TRIGGER == "true" + +# Host: GAEAC6 - CTests +run_ctests-gaeac6: + extends: .ctests_cases_template + tags: + - gaeac6 + needs: + - create_ctests-gaeac6 + rules: + - if: $GITHUB_API_TRIGGER == "true" + +# ======================================= +# Common build configurations by host +# These will always be included, regardless of modality +# ======================================= + +build-hera: + extends: .build_template + variables: + machine: hera + tags: + - hera + +build-gaeac6: + extends: .build_template + variables: + machine: gaeac6 + tags: + - gaeac6 diff --git a/dev/ci/platforms/config.gaeac6 b/dev/ci/platforms/config.gaeac6 index f32c7e5627e..08883e36782 100644 --- a/dev/ci/platforms/config.gaeac6 +++ b/dev/ci/platforms/config.gaeac6 @@ -41,7 +41,7 @@ export GITLAB_RUNNER_NAME="RDHPCS Gaea C6" # Directory for GitLab builds # Used in launch_gitlab_runner.sh for location of builds -export GITLAB_CI_BUILDS_DIR=/gpfs/f6/drsa-precip3/world-shared/global/CI/GITLAB +export GITLAB_BUILDS_DIR=/gpfs/f6/drsa-precip3/world-shared/global/CI/GITLAB # Directory for GitLab runner used by launch_gitlab_runner.sh export GITLAB_RUNNER_DIR="${GFS_CI_ROOT}/GitLab/Runner" diff --git a/dev/ci/platforms/config.hera b/dev/ci/platforms/config.hera index 1b589cf943d..c457f875b15 100644 --- a/dev/ci/platforms/config.hera +++ b/dev/ci/platforms/config.hera @@ -41,19 +41,22 @@ export GITLAB_URL=https://vlab.noaa.gov/gitlab-licensed export GITLAB_RUNNER_NAME="RDHPCS Hera" # Directory for GitLab builds -# Used in launch_gitlab_runner.sh for location of builds -export GITLAB_CI_BUILDS_DIR=/scratch1/NCEPDEV/global/glopara/GFS_CI_CD/CI_WORKSPACES/CI_GITLAB +# Used in launch_gitlab_runner.sh for the --builds-dir parameter +# This works with --custom_build_dir-enabled=true to allow GIT_CLONE_PATH override +# from .gitlab-ci.yml to specify exact clone locations within this directory +export GITLAB_BUILDS_DIR=${GFS_CI_ROOT}/BUILDS/GITLAB # Directory for GitLab runner used by launch_gitlab_runner.sh +# This is where runner state/config files are stored (--working-directory parameter) export GITLAB_RUNNER_DIR="${GFS_CI_ROOT}/GitLab/Runner" # CTest functional test directories for pre stagged input data -export STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR +export CTESTS_STAGED_TESTS_DIR=${GFS_CI_ROOT}/STAGED_TESTS_DIR ######################################################################### # CI CRON system configuration ######################################################################### -export GFS_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI +export GW_BASH_CI_ROOT=${GFS_CI_ROOT}/GFS_BASH_CI export max_concurrent_cases=5 export max_concurrent_pr=4 diff --git a/dev/ci/scripts/check_ci.sh b/dev/ci/scripts/check_ci.sh deleted file mode 100755 index b1a8fa05d9b..00000000000 --- a/dev/ci/scripts/check_ci.sh +++ /dev/null @@ -1,180 +0,0 @@ -#!/bin/bash -set -eux -##################################################################################### -# -# Script description: BASH script for checking for cases in a given PR and -# running rocotostat on each to determine if the experiment has -# succeeded or faild. This script is intended -# to run from within a cron job in the CI Managers account -##################################################################################### - -HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" -scriptname=$(basename "${BASH_SOURCE[0]}") -echo "Begin ${scriptname} at $(date -u)" || true -export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' - -REPO_URL=${REPO_URL:-"git@github.com:NOAA-EMC/global-workflow.git"} - -######################################################################### -# Set up runtime environment varibles for accounts on supproted machines -######################################################################### - -source "${HOMEgfs}/ush/detect_machine.sh" -case ${MACHINE_ID} in - hera | orion | hercules | wcoss2 | gaea) - echo "Running Automated Testing on ${MACHINE_ID}" - source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}" - ;; - *) - echo "Unsupported platform. Exiting with error." - exit 1 - ;; -esac -set +x -export HOMEgfs -source "${HOMEgfs}/ush/module-setup.sh" -source "${HOMEgfs}/ci/scripts/utils/ci_utils.sh" -module use "${HOMEgfs}/modulefiles" -module load "module_gwsetup.${MACHINE_ID}" -module list -# Load machine specific modules for ci (only wcoss2 is current) -if [[ "${MACHINE_ID}" == "wcoss2" ]]; then - module load "module_gwci.${MACHINE_ID}" -fi -set -x -if ! command -v gh > /dev/null; then - GH="${HOME}/bin/gh" -else - GH=$(command -v gh) -fi -export GH - -rocotostat=$(command -v rocotostat) -if [[ -z ${rocotostat} ]]; then - echo "rocotostat not found on system" - exit 1 -else - echo "rocotostat being used from ${rocotostat}" -fi -rocotocheck=$(command -v rocotocheck) -if [[ -z ${rocotocheck} ]]; then - echo "rocotocheck not found on system" - exit 1 -else - echo "rocotocheck being used from ${rocotocheck}" -fi - -pr_list_dbfile="${GFS_BASH_CI_ROOT}/open_pr_list.db" - -pr_list="" -if [[ -f "${pr_list_dbfile}" ]]; then - pr_list=$("${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --list Open Running) || true -fi -if [[ -z "${pr_list}" ]]; then - echo "no PRs open and ready to run cases on .. exiting" - exit 0 -fi - -############################################################# -# Loop throu all PRs in PR List and look for expirments in -# the RUNTESTS dir and for each one run runcotorun on them -############################################################# - -for pr in ${pr_list}; do - id=$("${GH}" pr view "${pr}" --repo "${REPO_URL}" --json id --jq '.id') - output_ci="${GFS_BASH_CI_ROOT}/PR/${pr}/output_runtime_${id}" - output_ci_single="${GFS_BASH_CI_ROOT}/PR/${pr}/output_runtime_single.log" - echo "Processing Pull Request #${pr} and looking for cases" - pr_dir="${GFS_BASH_CI_ROOT}/PR/${pr}" - - # If there is no RUNTESTS dir for this PR then cases have not been made yet - if [[ ! -d "${pr_dir}/RUNTESTS" ]]; then - continue - fi - - #Check for PR success when ${pr_dir}/RUNTESTS/EXPDIR is void of subfolders - # since all successfull ones where previously removed - # shellcheck disable=SC2312 - if [[ -z $(ls -A "${pr_dir}/RUNTESTS/EXPDIR") ]] ; then - "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Passed" - sed -i "1 i\`\`\`" "${output_ci}" - sed -i "1 i\All CI Test Cases Passed on ${MACHINE_ID^}:" "${output_ci}" - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" - "${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - # Check to see if this PR that was opened by the weekly tests and if so close it if it passed on all platforms - weekly_labels=$(${GH} pr view "${pr}" --repo "${REPO_URL}" --json headRefName,labels,author --jq 'select(.author.login | contains("emcbot")) | select(.headRefName | contains("weekly_ci")) | .labels[].name ') || true - if [[ -n "${weekly_labels}" ]]; then - num_platforms=$(find "${HOMEgfs}/ci/platforms" -type f -name "config.*" | wc -l) - passed=0 - for platforms in "${HOMEgfs}"/ci/platforms/config.*; do - machine=$(basename "${platforms}" | cut -d. -f2) - if [[ "${weekly_labels}" == *"CI-${machine^}-Passed"* ]]; then - ((passed=passed+1)) - fi - done - if [[ "${passed}" == "${num_platforms}" ]]; then - "${GH}" pr close --repo "${REPO_URL}" "${pr}" - fi - fi - # Completely remove the PR and its cloned repo on sucess - # of all cases on this platform - rm -Rf "${pr_dir}" - continue - fi - - for pslot_dir in "${pr_dir}/RUNTESTS/EXPDIR/"*; do - pslot=$(basename "${pslot_dir}") || true - if [[ -z "${pslot}" ]]; then - echo "No experiments found in ${pslot_dir} .. exiting" - exit 0 - fi - xml="${pslot_dir}/${pslot}.xml" - db="${pslot_dir}/${pslot}.db" - if [[ ! -f "${db}" ]]; then - continue - fi - - set +e - rocoto_state="$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}")" - rocoto_error=$? - rm -f "${output_ci_single}" - if [[ "${rocoto_error}" -ne 0 ]]; then - "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed" - if [[ "${rocoto_state}" == "STALLED" ]]; then - # shellcheck disable=SC2312 - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body "Experiment ${pslot} **${rocoto_state}** on ${MACHINE_ID^} at $(date +'%D %r')" - "${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - cancel_all_batch_jobs "${pr_dir}/RUNTESTS" - exit "${rocoto_error}" - fi - error_logs=$("${rocotostat}" -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs "${rocotocheck}" -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true - # shellcheck disable=SC2086 - ${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo "PR_${pr}" > /dev/null - # shellcheck disable=SC2086 - gist_url="$("${HOMEgfs}/ci/scripts/utils/publish_logs.py" --file ${error_logs} --gist "PR_${pr}")" - { - echo "Experiment ${pslot} **${rocoto_state}** on ${MACHINE_ID^} at $(date +'%D %r')" || true - echo "" - echo "Error logs:" - echo "\`\`\`" - echo "${error_logs}" - echo "\`\`\`" - echo "Follow link here to view the contents of the above file(s): [(link)](${gist_url})" - } >> "${output_ci_single}" - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" - "${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - cancel_all_batch_jobs "${pr_dir}/RUNTESTS" - exit "${rocoto_error}" - fi - if [[ "${rocoto_state}" == "DONE" ]]; then - #Remove Experment cases that completed successfully - "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh" cleanup_experiment "${pslot_dir}" - rm -f "${output_ci_single}" - # echo "\`\`\`" > "${output_ci_single}" - DATE=$(date +'%D %r') - echo "Experiment ${pslot} **SUCCESS** on ${MACHINE_ID^} at ${DATE}" >> "${output_ci_single}" - echo "Experiment ${pslot} *** SUCCESS *** at ${DATE}" >> "${output_ci}" - # "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" - fi - done -done diff --git a/dev/ci/scripts/clone-build_ci.sh b/dev/ci/scripts/clone-build_ci.sh deleted file mode 100755 index 3cef7fc230b..00000000000 --- a/dev/ci/scripts/clone-build_ci.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash -set -eux - -##################################################################### -# Usage and arguments for specfifying cloned directgory -##################################################################### -usage() { - set +x - echo - echo "Usage: $0 -p -d -o -h" - echo - echo " -p PR number to clone and build" - echo " -d Full path of of where to clone and build PR" - echo " -o Full path to output message file detailing results of CI tests" - echo " -h display this message and quit" - echo - exit 1 -} - -################################################################ -while getopts "p:d:o:h" opt; do - case ${opt} in - p) - PR=${OPTARG} - ;; - d) - repodir=${OPTARG} - ;; - o) - outfile=${OPTARG} - ;; - h|\?|:) - usage - ;; - *) - echo "Unrecognized option" - usage - ;; - esac -done - -cd "${repodir}" || exit 1 -if [[ -d global-workflow ]]; then - rm -Rf global-workflow -fi - -git clone "${REPO_URL}" -cd global-workflow || exit 1 - -# checkout pull request -"${GH}" pr checkout "${PR}" --repo "${REPO_URL}" --recurse-submodules -HOMEgfs="${PWD}" -source "${HOMEgfs}/ush/detect_machine.sh" - -#################################################################### -# start output file -{ - echo "Automated global-workflow Testing Results:" - echo '```' - echo "Machine: ${MACHINE_ID^}" - echo "Start: $(date) on $(hostname)" || true - echo "---------------------------------------------------" -} >> "${outfile}" -###################################################################### - -# get commit hash -commit=$(git log --pretty=format:'%h' -n 1) -echo "${commit}" > "../commit" - -# build full cycle -cd sorc || exit 1 -set +e - -source "${HOMEgfs}/ush/module-setup.sh" -export BUILD_JOBS=8 -rm -rf log.build -./build_all.sh -guk >> log.build 2>&1 -build_status=$? - -DATE=$(date +'%D %r') -if [[ ${build_status} != 0 ]]; then - { - echo "Build: *** FAILED ***" - echo "Build: Failed at ${DATE}" - cat "${PWD}/log.build" - cat "${PWD}/logs/error.logs" - } >> "${outfile}" - exit "${build_status}" -else - { - echo "Build: Completed at ${DATE}" - } >> "${outfile}" -fi - -LINK_LOGFILE_PATH=link_workflow.log -rm -f "${LINK_LOGFILE_PATH}" -./link_workflow.sh >> "${LINK_LOGFILE_PATH}" 2>&1 -link_status=$? -if [[ ${link_status} != 0 ]]; then - DATE=$(date +'%D %r') - { - echo "Link: *** FAILED ***" - echo "Link: Failed at ${DATE}" - cat "${LINK_LOGFILE_PATH}" - } >> "${outfile}" - exit "${link_status}" -fi - -echo "check/build/link test completed" -exit "${build_status}" diff --git a/dev/ci/scripts/driver.sh b/dev/ci/scripts/driver.sh deleted file mode 100755 index fd1d1a66c7b..00000000000 --- a/dev/ci/scripts/driver.sh +++ /dev/null @@ -1,285 +0,0 @@ -#!/bin/bash -set -eux - -##################################################################################### -# -# Script description: Top level driver script for checking PR -# ready for CI regression testing -# -# Abstract: -# -# This script uses GitHub CLI to check for Pull Requests with CI-Ready-${machine} tags on the -# development branch for the global-workflow repo. It then stages tests directories per -# PR number and calls clone-build_ci.sh to perform a clone and full build from the PR. -# It then is ready to run a suite of regression tests with various configurations -####################################################################################### - -export REPO_URL=${REPO_URL:-"git@github.com:NOAA-EMC/global-workflow.git"} - -################################################################ -# Setup the reletive paths to scripts and PS4 for better logging -################################################################ -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" -scriptname=$(basename "${BASH_SOURCE[0]}") -echo "Begin ${scriptname} at $(date +'%D %r')" || true -export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' - -######################################################################### -# Set up runtime environment varibles for accounts on supproted machines -######################################################################### - -source "${ROOT_DIR}/ush/detect_machine.sh" -case ${MACHINE_ID} in - hera | orion | hercules | wcoss2 | gaea) - echo "Running Automated Testing on ${MACHINE_ID}" - source "${ROOT_DIR}/dev/ci/platforms/config.${MACHINE_ID}" - ;; - *) - echo "Unsupported platform. Exiting with error." - exit 1 - ;; -esac - -###################################################### -# setup runtime env for correct python install and git -###################################################### -HOMEgfs=${ROOT_DIR} -export HOMEgfs -set +x -source "${ROOT_DIR}/dev/ci/scripts/utils/ci_utils.sh" -source "${ROOT_DIR}/ush/module-setup.sh" -module use "${ROOT_DIR}/modulefiles" -module load "module_gwsetup.${MACHINE_ID}" -# Load machine specific modules for ci (only wcoss2 is current) -if [[ "${MACHINE_ID}" == "wcoss2" ]]; then - module load "module_gwci.${MACHINE_ID}" -fi -set -x -unset HOMEgfs -if ! command -v gh > /dev/null; then - GH="${HOME}/bin/gh" -else - GH=$(command -v gh) -fi -export GH - -############################################################ -# query repo and get list of open PRs with tags {machine}-CI -############################################################ - -pr_list_dbfile="${GFS_BASH_CI_ROOT}/open_pr_list.db" - -if [[ ! -f "${pr_list_dbfile}" ]]; then - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --create --dbfile "${pr_list_dbfile}" -fi - -pr_list=$(${GH} pr list --repo "${REPO_URL}" --label "CI-${MACHINE_ID^}-Ready" --state "open" | awk '{print $1}') || true - -for pr in ${pr_list}; do - pr_dir="${GFS_BASH_CI_ROOT}/PR/${pr}" - [[ ! -d ${pr_dir} ]] && mkdir -p "${pr_dir}" - db_list=$("${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --add_pr "${pr}" --dbfile "${pr_list_dbfile}") - output_ci_single="${pr_dir}/output_single.log" - ############################################################# - # Check if a Ready labeled PR has changed back from once set - # and in that case completely kill the previose driver.sh cron - # job and all its decedands as well as removing all previous - # jobs in scheduler and associated files in the PR - ############################################################# - if [[ "${db_list}" == *"already is in list"* ]]; then - # Get the the PID and HOST of the driver.sh cron job - # that is stored int he CI database for this PR - driver_ID=$("${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --display "${pr}" | awk '{print $4}') || true - driver_PID=$(echo "${driver_ID}" | cut -d":" -f1) || true - driver_HOST=$(echo "${driver_ID}" | cut -d":" -f2) || true - host_name=$(hostname -s) - rm -f "${output_ci_single}" - { - echo "CI Update on ${MACHINE_ID^} at $(date +'%D %r')" || true - echo "=================================================" - echo "PR:${pr} Reset to ${MACHINE_ID^}-Ready by user and is now restarting CI tests" || true - } >> "${output_ci_single}" - if [[ "${driver_PID}" -ne 0 ]]; then - echo "Driver PID: ${driver_PID} no longer running this build having it killed" - if [[ "${driver_HOST}" == "${host_name}" ]]; then - pstree_out="$(pstree -A -p "${driver_PID}")" - if [[ -n "${pstree_out}" ]]; then - #shellcheck disable=SC2312 - echo -e "${pstree_out}" | grep -Pow "(?<=\()[0-9]+(?=\))" | xargs kill - fi - else - # Check if the driver is still running on the head node; if so, kill it and all child processes - #shellcheck disable=SC2029 - ssh "${driver_HOST}" "pstree -A -p \"${driver_PID}\" | grep -Eow \"[0-9]+\" | xargs kill || echo \"Failed to kill process with PID: ${driver_PID}, it may not be valid.\"" - fi - { - echo "Driver PID: Requested termination of ${driver_PID} and children on ${driver_HOST}" - echo "Driver PID: has restarted as $$ on ${host_name}" - } >> "${output_ci_single}" - fi - - experiments=$(find "${pr_dir}/RUNTESTS/EXPDIR" -mindepth 1 -maxdepth 1 -type d) || true - if [[ -z "${experiments}" ]]; then - echo "No current experiments to cancel in PR: ${pr} on ${MACHINE_ID^}" >> "${output_ci_single}" - else - for case in ${experiments}; do - case_name=$(basename "${case}") - cancel_batch_jobs "${case_name}" - { - echo "Canceled all jobs for experiment ${case_name} in PR:${pr} on ${MACHINE_ID^}" - } >> "${output_ci_single}" - done - fi - first_line=$(head -n 1 "${output_ci_single}") - if [[ "${first_line}" != '```' ]]; then - sed -i "1 i\`\`\`" "${output_ci_single}" - fi - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --add_pr "${pr}" --dbfile "${pr_list_dbfile}" - fi -done - -pr_list="" -if [[ -f "${pr_list_dbfile}" ]]; then - pr_list=$("${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --list Open Ready) || true -fi -if [[ -z "${pr_list}" ]]; then - echo "no PRs open and ready for checkout/build .. exiting" - exit 0 -fi - - -############################################################# -# Loop throu all open PRs -# Clone, checkout, build, creat set of cases, for each -############################################################# - -for pr in ${pr_list}; do - # Skip pr's that are currently Building for when overlapping driver scripts are being called from within cron - pr_building=$("${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --display "${pr}" --dbfile "${pr_list_dbfile}" | grep Building) || true - if [[ -n "${pr_building}" ]]; then - continue - fi - id=$("${GH}" pr view "${pr}" --repo "${REPO_URL}" --json id --jq '.id') - pr_dir="${GFS_BASH_CI_ROOT}/PR/${pr}" - output_ci="${pr_dir}/output_ci_${id}" - output_ci_single="${GFS_BASH_CI_ROOT}/PR/${pr}/output_single.log" - driver_build_PID=$$ - driver_build_HOST=$(hostname -s) - "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Ready" --add-label "CI-${MACHINE_ID^}-Building" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --update_pr "${pr}" Open Building "${driver_build_PID}:${driver_build_HOST}" - rm -Rf "${pr_dir}" - mkdir -p "${pr_dir}" - { - echo "CI Update on ${MACHINE_ID^} at $(date +'%D %r')" || true - echo "============================================" - echo "Cloning and Building global-workflow PR: ${pr}" - echo "with PID: ${driver_build_PID} on host: ${driver_build_HOST}" - echo "" - } >> "${output_ci_single}" - first_line=$(head -n 1 "${output_ci_single}") - if [[ "${first_line}" != '```' ]]; then - sed -i "1 i\`\`\`" "${output_ci_single}" - fi - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" - set +e - "${ROOT_DIR}/dev/ci/scripts/clone-build_ci.sh" -p "${pr}" -d "${pr_dir}" -o "${output_ci}" - ci_status=$? - ################################################################## - # Checking for special case when Ready label was updated - # but a race condtion caused the clone-build_ci.sh to start - # and this instance fails before it was killed. In th case we - # we need to exit this instance of the driver script - ################################################################# - if [[ ${ci_status} -ne 0 ]]; then - build_PID_check=$("${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --display "${pr}" --dbfile "${pr_list_dbfile}" | awk '{print $4}' | cut -d":" -f1) || true - if [[ "${build_PID_check}" -ne "$$" ]]; then - echo "Driver build PID: ${build_PID_check} no longer running this build ... exiting" - exit 0 - fi - fi - set -e - if [[ ${ci_status} -eq 0 ]]; then - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --update_pr "${pr}" Open Built "0:0" - #setup space to put an experiment - # export RUNTESTS for yaml case files to pickup - export RUNTESTS="${pr_dir}/RUNTESTS" - rm -Rf "${pr_dir:?}/RUNTESTS/"* - - ############################################################# - # loop over every yaml file in the PR's ci/cases - # and create an run directory for each one for this PR loop - ############################################################# - HOMEgfs="${pr_dir}/global-workflow" - cd "${HOMEgfs}" - pr_sha=$(git rev-parse --short HEAD) - - for yaml_config in "${HOMEgfs}/dev/ci/cases/pr/"*.yaml; do - case=$(basename "${yaml_config}" .yaml) || true - # export pslot for yaml case files to pickup - export pslot="${case}_${pr_sha}" - rm -Rf "${STMP}/RUNDIRS/${pslot}" - set +e - export LOGFILE_PATH="${HOMEgfs}/dev/ci/scripts/create_experiment.log" - rm -f "${LOGFILE_PATH}" - yaml_case_file="${HOMEgfs}/dev/ci/cases/pr/${case}.yaml" - skip_hosts=$("${HOMEgfs}/dev/ci/scripts/utils/parse_yaml.py" --yaml "${yaml_case_file}" --key skip_ci_on_hosts --string) - if [[ "${skip_hosts}" == *"${MACHINE_ID}"* ]]; then - { - echo "Case setup: Skipped for experiment ${pslot}" || true - } >> "${output_ci}" - continue - fi - "${HOMEgfs}/dev/workflow/create_experiment.py" --yaml "${HOMEgfs}/dev/ci/cases/pr/${case}.yaml" --overwrite > "${LOGFILE_PATH}" 2>&1 - ci_status=$? - set -e - if [[ ${ci_status} -eq 0 ]]; then - { - echo "Case setup: Completed for experiment ${pslot}" || true - } >> "${output_ci}" - else - { - echo "*** Failed *** to create experiment: ${pslot} on ${MACHINE_ID^}" - echo "" - cat "${LOGFILE_PATH}" - } >> "${output_ci}" - "${GH}" pr edit "${pr}" --repo "${REPO_URL}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Failed" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" - exit 1 - fi - done - - "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Running" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --update_pr "${pr}" Open Running "0:0" - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" - - else # failed to clone and build - - { - echo "Failed on cloning and building global-workflowi PR: ${pr}" - echo "CI on ${MACHINE_ID^} failed to build on $(date) for repo ${REPO_URL}" || true - } >> "${output_ci}" - - "${GH}" pr edit "${pr}" --repo "${REPO_URL}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Failed" - "${ROOT_DIR}/dev/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" - - if [[ -f "${HOMEgfs}/sorc/logs/error.logs" ]]; then - gist_URL=$("${ROOT_DIR}/dev/ci/scripts/utils/ci_utils_wrapper.sh" publish_logs "PR_${pr}" "${HOMEgfs}/sorc" "${HOMEgfs}/sorc/logs/error.logs") - { - echo -e "\nError logs from build" - echo "Gist URL: ${gist_URL}" - } >> "${output_ci}" - fi - "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" - - fi - -done # looping over each open and labeled PR - -########################################## -# scrub working directory for older files -########################################## -# -#find "${GFS_BASH_CI_ROOT}/PR/*" -maxdepth 1 -mtime +3 -exec rm -rf {} \; diff --git a/dev/ci/scripts/driver_weekly.sh b/dev/ci/scripts/driver_weekly.sh index cd11ec1bc03..b5b8201b5ba 100755 --- a/dev/ci/scripts/driver_weekly.sh +++ b/dev/ci/scripts/driver_weekly.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eux ############################################################################################## diff --git a/dev/ci/scripts/run-check_ci.sh b/dev/ci/scripts/run_check_ci.sh similarity index 99% rename from dev/ci/scripts/run-check_ci.sh rename to dev/ci/scripts/run_check_ci.sh index fe85d275034..9e843d88517 100755 --- a/dev/ci/scripts/run-check_ci.sh +++ b/dev/ci/scripts/run_check_ci.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu diff --git a/dev/ci/scripts/run_ci.sh b/dev/ci/scripts/run_ci.sh deleted file mode 100755 index aa77a7dbf6b..00000000000 --- a/dev/ci/scripts/run_ci.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -set -eux - -##################################################################################### -# -# Script description: BASH script for checking for cases in a given PR and -# simply running rocotorun on each. This script is intended -# to run from within a cron job in the CI Managers account -# Abstract TODO -##################################################################################### - -HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" -scriptname=$(basename "${BASH_SOURCE[0]}") -echo "Begin ${scriptname} at $(date -u)" || true -export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' - -######################################################################### -# Set up runtime environment varibles for accounts on supproted machines -######################################################################### - -source "${HOMEgfs}/ush/detect_machine.sh" -case ${MACHINE_ID} in - hera | orion | hercules | wcoss2 | gaea) - echo "Running Automated Testing on ${MACHINE_ID}" - source "${HOMEgfs}/dev/ci/platforms/config.${MACHINE_ID}" - ;; - *) - echo "Unsupported platform. Exiting with error." - exit 1 - ;; -esac -set +x -export HOMEgfs -source "${HOMEgfs}/ush/module-setup.sh" -module use "${HOMEgfs}/modulefiles" -module load "module_gwsetup.${MACHINE_ID}" -module list -set -eux -rocotorun=$(command -v rocotorun) -if [[ -z ${rocotorun} ]]; then - echo "rocotorun not found on system" - exit 1 -else - echo "rocotorun being used from ${rocotorun}" -fi - -pr_list_dbfile="${GFS_BASH_CI_ROOT}/open_pr_list.db" - -pr_list="" -if [[ -f "${pr_list_dbfile}" ]]; then - pr_list=$("${HOMEgfs}/dev/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --list Open Running) || true - pr_list=$(echo "${pr_list}" | tr ' ' '\n' | head -n "${max_concurrent_pr}" | tr '\n' ' ') || true -fi -if [[ -z "${pr_list}" ]]; then - echo "no open and built PRs that are ready for the cases to advance with rocotorun .. exiting" - exit 0 -fi - -############################################################# -# Loop throu all PRs in PR List and look for expirments in -# the RUNTESTS dir and for each one run runcotorun on them -# only up to $max_concurrent_cases will advance at a time -############################################################# - -for pr in ${pr_list}; do - echo "Processing Pull Request #${pr} and looking for cases" - pr_dir="${GFS_BASH_CI_ROOT}/PR/${pr}" - # If the directory RUNTESTS is not present then - # setupexpt.py has no been run yet for this PR - if [[ ! -d "${pr_dir}/RUNTESTS" ]]; then - continue - fi - num_cases=0 - for pslot_dir in "${pr_dir}/RUNTESTS/EXPDIR/"*; do - if [[ ! -d "${pslot_dir}" ]]; then - continue - fi - ((num_cases=num_cases+1)) - # No more than two cases are going forward at a time for each PR - if [[ "${num_cases}" -gt "${max_concurrent_cases}" ]]; then - continue - fi - pslot=$(basename "${pslot_dir}") - xml="${pslot_dir}/${pslot}.xml" - db="${pslot_dir}/${pslot}.db" - echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" - "${rocotorun}" -v 10 -w "${xml}" -d "${db}" - done -done diff --git a/dev/ci/scripts/tests/test_create_experiment.py b/dev/ci/scripts/unittests/test_create_experiment.py similarity index 67% rename from dev/ci/scripts/tests/test_create_experiment.py rename to dev/ci/scripts/unittests/test_create_experiment.py index 6f1e1390f29..2483f4f3b90 100644 --- a/dev/ci/scripts/tests/test_create_experiment.py +++ b/dev/ci/scripts/unittests/test_create_experiment.py @@ -1,13 +1,16 @@ import os from shutil import rmtree - +from pathlib import Path +import sys from wxflow import Executable - -_here = os.path.dirname(__file__) - -HOMEgfs = os.path.abspath(os.path.join(os.path.abspath(_here), '../../../..')) -RUNDIR_FAKE = os.path.join(_here, 'testdata/RUNDIR') -ICSDIR_FAKE = os.path.join(_here, 'testdata/ICSDIR') +# update sys.path to include the utils directory for find_homegfs +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "utils")) +from find_homegfs import find_homegfs + +HOMEgfs = find_homegfs() +current_dir = os.path.dirname(os.path.abspath(__file__)) +RUNDIR_FAKE = os.path.join(current_dir, 'testdata/RUNDIR') +ICSDIR_FAKE = os.path.join(current_dir, 'testdata/ICSDIR') def test_create_experiment(): diff --git a/dev/ci/scripts/unittests/test_data/test_config.yaml b/dev/ci/scripts/unittests/test_data/test_config.yaml new file mode 100644 index 00000000000..797538bf44c --- /dev/null +++ b/dev/ci/scripts/unittests/test_data/test_config.yaml @@ -0,0 +1,21 @@ +# Test configuration file for parse_yaml.py unit tests +top_level: simple_value +nested: + key1: value1 + key2: value2 + deeper: + key3: value3 +numbers: + integer: 42 + float: 3.14 +list_data: + - item1 + - item2 + - item3 +complex: + nested_list: + - name: first + value: 1 + - name: second + value: 2 +template_value: "{{ HOMEgfs }}/some/path" diff --git a/dev/ci/scripts/unittests/test_find_homegfs.py b/dev/ci/scripts/unittests/test_find_homegfs.py new file mode 100644 index 00000000000..6ae75e490e8 --- /dev/null +++ b/dev/ci/scripts/unittests/test_find_homegfs.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +import unittest +import os +import sys +import tempfile +import shutil +from pathlib import Path +from find_homegfs import find_homegfs + +# Add parent directory to sys.path to import find_homegfs module +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +UTILS_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..', 'utils')) +sys.path.insert(0, UTILS_DIR) + + +class TestFindHOMEgfs(unittest.TestCase): + """Tests for the find_homegfs.py script""" + + def setUp(self): + # Create a temporary directory structure for testing + self.test_dir = tempfile.mkdtemp() + # Create a fake repo structure with .github directory + self.fake_repo_path = os.path.join(self.test_dir, "fake_repo") + os.makedirs(os.path.join(self.fake_repo_path, ".github")) + # Create a nested directory structure for testing + self.nested_dir = os.path.join(self.fake_repo_path, "dir1", "dir2", "dir3") + os.makedirs(self.nested_dir) + + def tearDown(self): + # Clean up the temporary directory + shutil.rmtree(self.test_dir) + + def test_find_homegfs_current_dir(self): + """Test find_homegfs when starting from the repo root""" + result = find_homegfs(self.fake_repo_path) + self.assertEqual(str(result), str(Path(self.fake_repo_path))) + + def test_find_homegfs_nested_dir(self): + """Test find_homegfs when starting from a nested directory""" + result = find_homegfs(self.nested_dir) + self.assertEqual(str(result), str(Path(self.fake_repo_path))) + + def test_find_homegfs_none_start_path(self): + """Test find_homegfs with None start_path (should use cwd)""" + # Save the current directory + original_dir = os.getcwd() + try: + # Change to the fake repo directory + os.chdir(self.fake_repo_path) + result = find_homegfs(None) + self.assertEqual(str(result), str(Path(self.fake_repo_path))) + finally: + # Restore the original directory + os.chdir(original_dir) + + def test_find_homegfs_not_found(self): + """Test find_homegfs when .github directory doesn't exist""" + # Create a directory outside the fake repo + outside_dir = os.path.join(self.test_dir, "outside") + os.makedirs(outside_dir) + + # Patch os.path.dirname to ensure we don't traverse beyond our test directory + real_dirname = os.path.dirname + + def mock_dirname(path): + result = real_dirname(path) + # If we're about to go above our test directory, return the same path + # to simulate reaching the filesystem root + if result == self.test_dir or os.path.dirname(result) == self.test_dir: + return path + return result + + original_dirname = os.path.dirname + os.path.dirname = mock_dirname + try: + with self.assertRaises(ValueError): + find_homegfs(outside_dir) + finally: + # Restore original function + os.path.dirname = original_dirname + + def test_with_string_path(self): + """Test find_homegfs with string path""" + result = find_homegfs(str(self.fake_repo_path)) + self.assertEqual(str(result), str(Path(self.fake_repo_path))) + + def test_with_path_object(self): + """Test find_homegfs with Path object""" + result = find_homegfs(Path(self.fake_repo_path)) + self.assertEqual(str(result), str(Path(self.fake_repo_path))) + + +if __name__ == '__main__': + print("Starting TestFindHOMEgfs tests...") + unittest.main(verbosity=2) diff --git a/dev/ci/scripts/unittests/test_parse_yaml.py b/dev/ci/scripts/unittests/test_parse_yaml.py new file mode 100644 index 00000000000..ca704e568ca --- /dev/null +++ b/dev/ci/scripts/unittests/test_parse_yaml.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +import unittest +import os +import sys +import subprocess +from pathlib import Path + +# Add parent directory to sys.path to import utils modules +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +UTILS_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..', 'utils')) +sys.path.insert(0, UTILS_DIR) + +# Path to the test data directory +TEST_DATA_DIR = os.path.join(SCRIPT_DIR, 'test_data') +TEST_CONFIG = os.path.join(TEST_DATA_DIR, 'test_config.yaml') +SCRIPT_PATH = os.path.join(SCRIPT_DIR, '..', 'utils', 'parse_yaml.py') + + +class TestParseYAML(unittest.TestCase): + """Tests for the parse_yaml.py script""" + + @classmethod + def setUpClass(cls): + # Ensure test_data directory exists + os.makedirs(TEST_DATA_DIR, exist_ok=True) + # Create test yaml file if it doesn't exist + if not os.path.exists(TEST_CONFIG): + with open(TEST_CONFIG, 'w') as f: + f.write('''# Test configuration file for parse_yaml.py unit tests +top_level: simple_value +nested: + key1: value1 + key2: value2 + deeper: + key3: value3 +numbers: + integer: 42 + float: 3.14 +list_data: + - item1 + - item2 + - item3 +complex: + nested_list: + - name: first + value: 1 + - name: second + value: 2 +template_value: "/path/to/homegfs/some/path"''') + + def test_cli_basic(self): + """Test the command-line interface with basic options""" + # Test retrieving a simple value + cmd = [sys.executable, SCRIPT_PATH, '-y', TEST_CONFIG, '-k', 'top_level'] + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Command failed: {result.stderr}") + self.assertEqual(result.stdout.strip(), 'simple_value') + + # Test retrieving a nested value + cmd = [sys.executable, SCRIPT_PATH, '-y', TEST_CONFIG, '-k', 'nested.key2'] + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Command failed: {result.stderr}") + self.assertEqual(result.stdout.strip(), 'value2') + + def test_cli_default_value(self): + """Test the --default option""" + # Test default value for non-existent key + cmd = [sys.executable, SCRIPT_PATH, '-y', TEST_CONFIG, '-k', 'missing.key', + '-d', 'default_value'] + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Command failed: {result.stderr}") + self.assertEqual(result.stdout.strip(), 'default_value') + + def test_cli_fail_on_missing(self): + """Test the --fail-on-missing option""" + # Test that non-existent key with --fail-on-missing fails + cmd = [sys.executable, SCRIPT_PATH, '-y', TEST_CONFIG, '-k', 'missing.key', + '--fail-on-missing'] + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 1, "Expected command to fail with code 1") + self.assertIn("not found", result.stderr) + + # JSON formatting test has been removed + + def test_cli_string_option(self): + """Test the --string option for list output""" + # Test string output for a list + cmd = [sys.executable, SCRIPT_PATH, '-y', TEST_CONFIG, '-k', 'list_data', + '--string'] + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Command failed: {result.stderr}") + # Each list item should be on a separate line + lines = result.stdout.strip().split('\n') + self.assertEqual(lines, ['item1', 'item2', 'item3']) + + +if __name__ == '__main__': + print("Starting TestParseYAML tests...") + unittest.main(verbosity=2) diff --git a/dev/ci/scripts/tests/test_rocotostat.py b/dev/ci/scripts/unittests/test_rocotostat.py similarity index 100% rename from dev/ci/scripts/tests/test_rocotostat.py rename to dev/ci/scripts/unittests/test_rocotostat.py diff --git a/dev/ci/scripts/tests/test_setup.py b/dev/ci/scripts/unittests/test_setup.py similarity index 93% rename from dev/ci/scripts/tests/test_setup.py rename to dev/ci/scripts/unittests/test_setup.py index 38f14e11459..d8155d9bfb4 100755 --- a/dev/ci/scripts/tests/test_setup.py +++ b/dev/ci/scripts/unittests/test_setup.py @@ -3,10 +3,11 @@ from shutil import rmtree from wxflow import Executable, Configuration, ProcessError +from find_homegfs import find_homegfs -_here = os.path.dirname(__file__) -HOMEgfs = os.path.abspath(os.path.join(os.path.abspath(_here), '../../../..')) -RUNDIR = os.path.join(_here, 'testdata/RUNDIR') +HOMEgfs = find_homegfs() +current_dir = os.path.dirname(os.path.abspath(__file__)) +RUNDIR = os.path.join(current_dir, 'testdata/RUNDIR') pslot = "C48_ATM" account = "fv3-cpu" foobar = "foobar" diff --git a/dev/ci/scripts/tests/wxflow b/dev/ci/scripts/unittests/wxflow similarity index 100% rename from dev/ci/scripts/tests/wxflow rename to dev/ci/scripts/unittests/wxflow diff --git a/dev/ci/scripts/utils/ci_utils.sh b/dev/ci/scripts/utils/ci_utils.sh index 5bc6da43368..01b83b66a92 100755 --- a/dev/ci/scripts/utils/ci_utils.sh +++ b/dev/ci/scripts/utils/ci_utils.sh @@ -1,4 +1,13 @@ -#!/bin/env bash +#!/usr/bin/env bash + +# Determine HOMEgfs_ and source machine detection early +if [[ -z "${HOMEgfs_}" ]]; then + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + HOMEgfs_="$("${SCRIPT_DIR}/find_homegfs.py")" +fi +source "${HOMEgfs_}/ush/detect_machine.sh" + +# --- Existing functions --- function determine_scheduler() { if command -v sbatch &> /dev/null; then @@ -61,7 +70,7 @@ function get_pr_case_list () { # loop over every yaml file in the PR's ci/cases # and create an run directory for each one for this PR loop ############################################################# - for yaml_config in "${HOMEgfs}/dev/ci/cases/pr/"*.yaml; do + for yaml_config in "${HOMEgfs_}/dev/ci/cases/pr/"*.yaml; do case=$(basename "${yaml_config}" .yaml) || true echo "${case}" done @@ -114,29 +123,29 @@ function cancel_all_batch_jobs () { function create_experiment () { local yaml_config="${1}" - cd "${HOMEgfs}" || exit 1 + cd "${HOMEgfs_}" || exit 1 pr_sha=$(git rev-parse --short HEAD) case=$(basename "${yaml_config}" .yaml) || true export pslot=${case}_${pr_sha} if [[ ${MACHINE_ID} == "noaacloud" ]]; then - source "${HOMEgfs}/dev/ci/platforms/config.${PW_CSP}" + source "${HOMEgfs_}/dev/ci/platforms/config.${PW_CSP}" else - source "${HOMEgfs}/dev/ci/platforms/config.${MACHINE_ID}" + source "${HOMEgfs_}/dev/ci/platforms/config.${MACHINE_ID}" fi - source "${HOMEgfs}/dev/ush/gw_setup.sh" + source "${HOMEgfs_}/dev/ush/gw_setup.sh" # Remove RUNDIRS dir incase this is a retry (STMP now in host file) if [[ ${MACHINE_ID} == "noaacloud" ]]; then - STMP=$("${HOMEgfs}/dev/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs}/dev/workflow/hosts/${PW_CSP}pw.yaml" -k STMP -s) + STMP=$("${HOMEgfs_}/dev/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs_}/dev/workflow/hosts/${PW_CSP}pw.yaml" -k STMP -s) else - STMP=$("${HOMEgfs}/dev/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs}/dev/workflow/hosts/${MACHINE_ID}.yaml" -k STMP -s) + STMP=$("${HOMEgfs_}/dev/ci/scripts/utils/parse_yaml.py" -y "${HOMEgfs_}/dev/workflow/hosts/${MACHINE_ID}.yaml" -k STMP -s) fi echo "Removing ${STMP}/RUNDIRS/${pslot} directory incase this is a retry" rm -Rf "${STMP}/RUNDIRS/${pslot}" - "${HOMEgfs}/${system}/dev/workflow/create_experiment.py" --overwrite --yaml "${yaml_config}" + "${HOMEgfs_}/${system}/dev/workflow/create_experiment.py" --overwrite --yaml "${yaml_config}" } @@ -160,8 +169,8 @@ function publish_logs() { if [[ -n "${full_paths}" ]]; then # shellcheck disable=SC2027,SC2086 - ${HOMEgfs}/dev/ci/scripts/utils/publish_logs.py --file ${full_paths} --repo ${PR_header} > /dev/null - URL="$("${HOMEgfs}/dev/ci/scripts/utils/publish_logs.py" --file "${full_paths}" --gist "${PR_header}")" + ${HOMEgfs_}/dev/ci/scripts/utils/publish_logs.py --file ${full_paths} --repo ${PR_header} > /dev/null + URL="$("${HOMEgfs_}/dev/ci/scripts/utils/publish_logs.py" --file "${full_paths}" --gist "${PR_header}")" fi echo "${URL}" } @@ -178,7 +187,7 @@ function cleanup_experiment() { pslot=$(basename "${EXPDIR}") # Use the Python utility to get the required variables - read -r ARCDIR ATARDIR STMP COMROOT < <("${HOMEgfs}/dev/ci/scripts/utils/get_config_var.py" ARCDIR ATARDIR STMP COMROOT "${EXPDIR}") || true + read -r ARCDIR ATARDIR STMP COMROOT < <("${HOMEgfs_}/dev/ci/scripts/utils/get_config_var.py" ARCDIR ATARDIR STMP COMROOT "${EXPDIR}") || true rm -Rf "${ARCDIR:?}" rm -Rf "${ATARDIR:?}" @@ -187,14 +196,33 @@ function cleanup_experiment() { rm -Rf "${STMP}/RUNDIRS/${pslot:?}" } -function build_compute () { +function build () { - source "${HOMEgfs}/dev/ci/platforms/config.${MACHINE_ID}" + source "${HOMEgfs_}/dev/ci/platforms/config.${MACHINE_ID}" # TODO: when it's safe to build on C6 compute nodes again, do so if [[ "${MACHINE_ID}" == "gaeac6" ]]; then - "${HOMEgfs}/sorc/build_all.sh" -v -k all + "${HOMEgfs_}/sorc/build_all.sh" -v -k all else - "${HOMEgfs}/sorc/build_compute.sh" -A "${HPC_ACCOUNT}" -v all + "${HOMEgfs_}/sorc/build_compute.sh" -A "${HPC_ACCOUNT}" -v all fi } + +# --- Dispatch logic --- + +# Check if the script is being executed directly (not sourced) +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + # Script is being executed directly + utility_function="${1}" + shift # Remove the function name from the arguments list + + # Check if the first argument corresponds to a defined function + type_t="$(type -t "${utility_function}")" || true + if [[ "${type_t}" == "function" ]]; then + # Call the function with the remaining arguments + "${utility_function}" "$@" + else + echo "ERROR: Utility function ${utility_function} not found or not a function in ${BASH_SOURCE[0]}" >&2 + exit 1 + fi +fi diff --git a/dev/ci/scripts/utils/ci_utils_wrapper.sh b/dev/ci/scripts/utils/ci_utils_wrapper.sh deleted file mode 100755 index 28e76459704..00000000000 --- a/dev/ci/scripts/utils/ci_utils_wrapper.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." >/dev/null 2>&1 && pwd )" -source "${HOMEgfs}/ush/detect_machine.sh" - -utility_function="${1}" - -source "${HOMEgfs}/dev/ci/scripts/utils/ci_utils.sh" -${utility_function} "${@:2}" diff --git a/dev/ci/scripts/utils/find_homegfs.py b/dev/ci/scripts/utils/find_homegfs.py new file mode 100755 index 00000000000..f97f896d6b9 --- /dev/null +++ b/dev/ci/scripts/utils/find_homegfs.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Utility module to find the HOMEgfs (repository root) directory by traversing up +the file system until finding a directory that contains the .github subdirectory. +""" + +import os +from pathlib import Path + + +def find_homegfs(start_path=None): + """ + Find the HOMEgfs directory by traversing up the file system until + finding a directory that contains the .github subdirectory. + + Parameters + ---------- + start_path : str or Path, optional + The path to start searching from. If not provided, the current + directory will be used. + + Returns + ------- + Path + The full path to the HOMEgfs directory. + + Raises + ------ + ValueError + If the HOMEgfs directory cannot be found. + """ + # If start_path is not provided, use the directory of the calling script + if start_path is None: + # Get the path of the calling script + start_path = os.getcwd() + + # Convert to Path object if it's a string + if isinstance(start_path, str): + start_path = Path(start_path) + else: + start_path = Path(start_path).resolve() + + # Start traversing up from the current directory + current_dir = start_path + + # Traverse up until we find .github directory or reach the filesystem root + while True: + # Check if .github exists in the current directory + if (current_dir / '.github').is_dir(): + return current_dir + + # Go up one level + parent_dir = current_dir.parent + + # If we've reached the root directory and haven't found .github + if parent_dir == current_dir: + raise ValueError( + "Could not find HOMEgfs directory. " + "Traversed up to the root without finding a .github directory." + ) + + current_dir = parent_dir + + +if __name__ == '__main__': + # Example usage when run as a script + try: + homegfs = find_homegfs() + print(homegfs) + except ValueError as e: + print(f"Error: {e}") diff --git a/dev/ci/scripts/utils/get_host_case_list.py b/dev/ci/scripts/utils/get_host_case_list.py index 59c62539181..c78a96397a9 100755 --- a/dev/ci/scripts/utils/get_host_case_list.py +++ b/dev/ci/scripts/utils/get_host_case_list.py @@ -5,28 +5,51 @@ import glob from wxflow import parse_j2yaml from wxflow import AttrDict +from find_homegfs import find_homegfs -_here = os.path.dirname(__file__) -_top = os.path.abspath(os.path.join(os.path.abspath(_here), '../../../..')) -if __name__ == '__main__': +def get_host_cases(host, homegfs=None): + """ + Get list of test cases supported on a host - if len(sys.argv) < 2: - print('Usage: get_host_case_list.py ') - sys.exit(1) - - host = sys.argv[1] + Args: + host (str): Host name to check + homegfs (str, optional): Path to the global-workflow repository root directory + Returns: + list: List of case names (without extension) supported on the host + """ + homegfs = homegfs or find_homegfs() case_list = [] - HOMEgfs = _top - data = AttrDict(HOMEgfs=_top) + + # Set up data for template rendering + data = AttrDict(HOMEgfs=homegfs) data.update(os.environ) - case_files = glob.glob(f'{HOMEgfs}/dev/ci/cases/pr/*.yaml') + # Get all case files + case_files = glob.glob(f'{homegfs}/dev/ci/cases/pr/*.yaml') + for case_yaml in case_files: + # Parse the case configuration case_conf = parse_j2yaml(path=case_yaml, data=data) + + # Skip cases that don't support this host if 'skip_ci_on_hosts' in case_conf: if host.lower() in [machine.lower() for machine in case_conf.skip_ci_on_hosts]: continue + + # Add the case name (without extension) to the list case_list.append(splitext(basename(case_yaml))[0]) - print(' '.join(case_list)) + + return case_list + + +if __name__ == '__main__': + # When run as a script, maintain the original behavior + if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'): + print('Usage: get_host_case_list.py ') + sys.exit(1) + + host = sys.argv[1] + cases = get_host_cases(host) + print(' '.join(cases)) diff --git a/dev/ci/scripts/utils/gitlab/badge-updater-pipeline.yml b/dev/ci/scripts/utils/gitlab/badge-updater-pipeline.yml new file mode 100644 index 00000000000..b7bd31e3752 --- /dev/null +++ b/dev/ci/scripts/utils/gitlab/badge-updater-pipeline.yml @@ -0,0 +1,79 @@ +# EMC-Bot Badge Updater Pipeline +# This pipeline is designed to be triggered by webhooks when other pipelines complete +# It handles updating GitHub Gists with pipeline status badges for NOAA's Global Workflow + +stages: + - update_badges + +update_all_badges: + image: bitnami/git:latest + stage: update_badges + script: + - | + # Install GitHub CLI and other required tools + apt-get update -qq && apt-get install -y -qq curl jq + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg + chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null + apt-get update -qq && apt-get install -y -qq gh + + # Authenticate GitHub CLI + echo "$GITHUB_TOKEN" | gh auth login --with-token + + # Create a temporary directory for badges + mkdir -p badges + + # Get information about the triggering pipeline + echo "Processing pipeline event data..." + PIPELINE_PROJECT_ID="${CI_PIPELINE_SOURCE_PROJECT_ID:-none}" + PIPELINE_BRANCH="${CI_PIPELINE_SOURCE_BRANCH:-develop}" + + # Default to global-workflow project if not specified + if [ "$PIPELINE_PROJECT_ID" == "none" ]; then + PROJECT_PATH="NWS/Operations/NCEP/EMC/global-workflow" + else + # You could look up the project path from the ID if needed + PROJECT_PATH="NWS/Operations/NCEP/EMC/global-workflow" + fi + + # Update main develop branch pipeline badge + echo "Updating develop branch badge..." + curl -L "https://vlab.noaa.gov/gitlab-licensed/${PROJECT_PATH}/badges/${PIPELINE_BRANCH}/pipeline.svg" -o "badges/main_pipeline.svg" + gh gist edit "ab937691224bdf50427cbeca666bf67b" "badges/main_pipeline.svg" + + # Update machine-specific badges + # These badges enhance visibility into the operational status of the global weather modeling system + # across NOAA's high-performance computing environments + + # HERA - NOAA Research HPC + echo "Updating HERA badge..." + curl -L "https://vlab.noaa.gov/gitlab-licensed/${PROJECT_PATH}/badges/${PIPELINE_BRANCH}/pipeline.svg?job=build-hera" -o "badges/hera_badge.svg" + gh gist edit "hera_gist_id" "badges/hera_badge.svg" + + # GAEA-C6 - Department of Energy HPC at Oak Ridge + echo "Updating GAEA-C6 badge..." + curl -L "https://vlab.noaa.gov/gitlab-licensed/${PROJECT_PATH}/badges/${PIPELINE_BRANCH}/pipeline.svg?job=build-gaeac6" -o "badges/gaeac6_badge.svg" + gh gist edit "gaeac6_gist_id" "badges/gaeac6_badge.svg" + + # WCOSS2 - NOAA Operational Weather and Climate Supercomputing System + echo "Updating WCOSS2 badge..." + curl -L "https://vlab.noaa.gov/gitlab-licensed/${PROJECT_PATH}/badges/${PIPELINE_BRANCH}/pipeline.svg?job=build-wcoss2" -o "badges/wcoss2_badge.svg" + gh gist edit "wcoss2_gist_id" "badges/wcoss2_badge.svg" + + # ORION - Navy HPC system + echo "Updating ORION badge..." + curl -L "https://vlab.noaa.gov/gitlab-licensed/${PROJECT_PATH}/badges/${PIPELINE_BRANCH}/pipeline.svg?job=build-orion" -o "badges/orion_badge.svg" + gh gist edit "orion_gist_id" "badges/orion_badge.svg" + + # Add future HPC systems here as they're integrated into the workflow + # This modular design allows for easy extension as computational resources expand + + echo "All badges updated successfully!" + variables: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # This allows the job to run even if GitLab can't access the gists for verification + allow_failure: true + + # Only run this when triggered by a webhook or manually + rules: + - if: $CI_PIPELINE_SOURCE == "trigger" || $CI_PIPELINE_SOURCE == "web" diff --git a/dev/ci/scripts/utils/gitlab/docs/README.md b/dev/ci/scripts/utils/gitlab/docs/README.md new file mode 100644 index 00000000000..70941433382 --- /dev/null +++ b/dev/ci/scripts/utils/gitlab/docs/README.md @@ -0,0 +1,108 @@ +# GitLab CI Pipeline Configuration + +This documentation describes the multi-modal GitLab CI pipeline configuration for the global-workflow project. + +## Pipeline Modalities + +The CI pipeline supports two different modalities: + +1. **Standard Pipeline** - The default pipeline that runs when triggered by normal GitLab events (push, merge requests) +2. **CTests Pipeline** - A specialized pipeline that runs when triggered via GitLab API from GitHub + +## Multi-Host Support + +Both pipeline modalities support running on multiple compute hosts using a shared configuration system. The pipeline is dynamically generated by the `generate_pipelines.py` script, which creates jobs for each supported host machine based on the test cases that each host supports. + +## Pipeline Structure + +Both modalities use the following stages: +- `build`: Builds the codebase (shared between modalities) +- `create_experiments`: Sets up experiments or ctests +- `run_tests`: Executes the test workflows +- `finalize`: Updates status badges + +## Standard Pipeline + +The standard pipeline when triggered normally: +- Builds the codebase for each machine +- Sets up experiments using configurations in `dev/ci/cases/pr/` +- Runs tests for each case on the appropriate machine +- Completes with final status reporting + +## CTests Pipeline + +The CTests pipeline when triggered via GitLab API from GitHub: +- Uses the same build jobs as the standard pipeline +- The build stage additionally sets up the CMake environment for ctests +- Skips the standard experiment creation +- Runs specialized CTest test cases on each machine + +### Currently Supported Functional CTests + +The pipeline currently supports the following tests: +- `C48_ATM_gfs_fcst_seg0` +- `C48_S2SW_gfs_fcst_seg0` +- `C48_S2SW_gfs_atmos_prod` + +NOTE: The input/output data needes to be updated on all these + +Additional tests will be added in the future from within the ctest framework in `dev/ctests`. + +## Shared Build Stage + +Both modalities share the same build stage, which: +1. Clones the repository +2. Builds the necessary components +3. Sets up the workspace for testing +4. If triggered by GitHub API, also prepares the CMake environment for ctests + +This shared approach ensures consistent build results while optimizing resource usage. + +## Triggering the CTests Pipeline from GitHub + +To trigger the CTests pipeline from GitHub, use the GitLab API with the following parameters: + +```bash +curl -X POST \ + --header "Content-Type: application/json" \ + --header "PRIVATE-TOKEN: " \ + "https://vlab.noaa.gov/gitlab-licensed/NWS/Operations/NCEP/EMC/global-workflow/api/v4/projects//trigger/pipeline" \ + --data '{ + "ref": "", + "variables": { + "GITHUB_API_TRIGGER": "true", + "PR_NUMBER": "", + "GITHUB_REPO_URL": "" + } + }' +``` + +Replace the following: +- ``: Your GitLab API token with appropriate permissions +- ``: The GitLab project ID for global-workflow +- ``: The branch to run tests on +- ``: The GitHub PR number (use "0" for non-PR runs) +- ``: The GitHub repository URL + +## Pipeline Configuration Files + +The configuration is split across multiple files: + +1. **Main Pipeline File** (`.gitlab-ci.yml`): + - Contains the common stages and variables + - Includes the appropriate modality file based on trigger + - Contains the build template shared by both modalities + - Contains the finalize stage job + +2. **Cases Configuration** (`.gitlab-ci-cases.yml`): + - Contains job templates specific to standard test cases + - Includes templates for running experiments + +3. **CTests Configuration** (`.gitlab-ci-ctests.yml`): + - Contains specific configurations for CTests + - Included conditionally when triggered via GitHub API + +2. When the pipeline runs: + - GitLab detects if `GITHUB_API_TRIGGER` is set to "true" + - If true, includes the `.gitlab-ci-ctests.yml` file + - Jobs check this variable to determine if they should run diff --git a/dev/ci/scripts/utils/launch_gitlab_runner.sh b/dev/ci/scripts/utils/gitlab/launch_gitlab_runner.sh similarity index 91% rename from dev/ci/scripts/utils/launch_gitlab_runner.sh rename to dev/ci/scripts/utils/gitlab/launch_gitlab_runner.sh index 732c3d26f6f..5a4224b5793 100755 --- a/dev/ci/scripts/utils/launch_gitlab_runner.sh +++ b/dev/ci/scripts/utils/gitlab/launch_gitlab_runner.sh @@ -13,8 +13,9 @@ set -e # Usage: ./launch_gitlab_runner.sh [register|run|unregister] [token] ######################################################################### -# Set the HOMEGFS_ variable to the root directory of the global workflow -HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." >/dev/null 2>&1 && pwd )" +# Set the HOMEgfs_ variable to the root directory of the global workflow +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HOMEgfs_="$("${SCRIPT_DIR}/../find_homegfs.py")" # Get the hostname of the current machine host="$(hostname)" @@ -23,7 +24,7 @@ host="$(hostname)" ######################################################################### # Source the detect_machine.sh script to determine the MACHINE_ID -source "${HOMEGFS_}/ush/detect_machine.sh" +source "${HOMEgfs_}/ush/detect_machine.sh" # Check the MACHINE_ID and set up the environment accordingly case "${MACHINE_ID}" in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) @@ -38,7 +39,7 @@ esac # Source the platform-specific configuration file # This file contains platform-specific variables such as GITLAB_URL, GITLAB_CI_BUILDS_DIR, # and GITLAB_RUNNER_DIR which are required for runner registration and execution -source "${HOMEGFS_}/dev/ci/platforms/config.${MACHINE_ID}" +source "${HOMEgfs_}/dev/ci/platforms/config.${MACHINE_ID}" # Change to the GitLab runner directory defined in the platform config cd "${GITLAB_RUNNER_DIR}" || exit 1 @@ -91,7 +92,7 @@ if [[ "${1}" == "register" ]]; then # --builds-dir: Directory where builds will be stored (from config.MACHINE_ID) # --custom_build_dir-enabled: Enable custom build directories # --request-concurrency: Number of concurrent requests that can be handled - ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_CI_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 + ./gitlab-runner register -n -t "${GITLAB_RUNNER_TOKEN}" --url "${GITLAB_URL}" --executor shell --shell bash --builds-dir "${GITLAB_BUILDS_DIR}" --custom_build_dir-enabled true --request-concurrency 24 # Set the concurrent job limit in the GitLab runner config file sed -i 's/concurrent.*/concurrent = 24/' ~/.gitlab-runner/config.toml @@ -104,7 +105,7 @@ fi if [[ "${1}" == "run" ]]; then # --working-directory: Directory where the runner is launched and keeps its working files (from config.$MACHINE_ID) - # do not confuse this with GitLabs CI_BUILDS_DIR which is designate by GFS_CI_BUILDS_DIR and is where the builds are stored + # do not confuse this with GitLab's CI_BUILDS_DIR which is designated by GW_BUILDS_DIR and is where the builds are stored COMMAND="nohup ./gitlab-runner run --working-directory ${GITLAB_RUNNER_DIR}" echo -e "Running gitlab-runner with the command:\n${COMMAND}\nsee log ${GITLAB_LOG}" echo -e "Running gitlab-runner with the command:${COMMAND}" >& "${GITLAB_LOG}" diff --git a/dev/ci/scripts/utils/launch_java_agent.sh b/dev/ci/scripts/utils/launch_java_agent.sh index 8e348d09b60..8727533bf6a 100755 --- a/dev/ci/scripts/utils/launch_java_agent.sh +++ b/dev/ci/scripts/utils/launch_java_agent.sh @@ -65,14 +65,16 @@ controller_url="https://jenkins.epic.oarcloud.noaa.gov" controller_user=${controller_user:-"terry.mcguinness"} controller_user_auth_token="jenkins_token" -HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." >/dev/null 2>&1 && pwd )" +# Set the HOMEgfs_ variable to the root directory of the global workflow +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HOMEgfs_="$("${SCRIPT_DIR}/find_homegfs.py")" host=$(hostname) ######################################################################### # Set up runtime environment varibles for accounts on supproted machines ######################################################################### -source "${HOMEGFS_}/ush/detect_machine.sh" +source "${HOMEgfs_}/ush/detect_machine.sh" case ${MACHINE_ID} in hera | orion | hercules | wcoss2 | gaeac5 | gaeac6 ) echo "Launch Jenkins Java Controler on ${MACHINE_ID}";; @@ -86,14 +88,14 @@ esac LOG=launched_agent-$(date +%Y%m%d%M).log rm -f "${LOG}" -HOMEgfs="${HOMEGFS_}" source "${HOMEGFS_}/ush/module-setup.sh" -module use "${HOMEGFS_}/modulefiles" +HOMEgfs="${HOMEgfs_}" source "${HOMEgfs_}/ush/module-setup.sh" +module use "${HOMEgfs_}/modulefiles" module load "module_gwsetup.${MACHINE_ID}" if [[ ${MACHINE_ID} == "noaacloud" ]]; then - source "${HOMEGFS_}/dev/ci/platforms/config.${PW_CSP}" + source "${HOMEgfs_}/dev/ci/platforms/config.${PW_CSP}" else - source "${HOMEGFS_}/dev/ci/platforms/config.${MACHINE_ID}" + source "${HOMEgfs_}/dev/ci/platforms/config.${MACHINE_ID}" fi JAVA_HOME="${JENKINS_AGENT_LAUNCH_DIR}/JAVA/jdk-17.0.10" diff --git a/dev/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt b/dev/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt deleted file mode 100644 index ddc6b057060..00000000000 --- a/dev/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt +++ /dev/null @@ -1,5 +0,0 @@ -sudo yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm -sudo yum -y install git -/contrib/Terry.McGuinness/SETUP/provision_runner.sh -ALLNODES -/contrib/Terry.McGuinness/SETUP/mount-epic-contrib.sh \ No newline at end of file diff --git a/dev/ci/scripts/utils/parallel_works/provision_runner.sh b/dev/ci/scripts/utils/parallel_works/provision_runner.sh deleted file mode 100755 index cac18c93156..00000000000 --- a/dev/ci/scripts/utils/parallel_works/provision_runner.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# This script provisions a GitHub Actions runner on a Rocky or CentOS system. -# It performs the following steps: -# 1. Checks the operating system from /etc/os-release. -# 2. Verifies if the operating system is either Rocky or CentOS. -# 3. Checks if an actions-runner process is already running for the current user. -# 4. Copies the actions-runner tar file from a specified directory to the home directory. -# 5. Extracts the tar file and starts the actions-runner in the background. -# -# The actions-runner tar file contains the necessary binaries and scripts to run -# a GitHub Actions runner. It is specific to the operating system and is expected -# to be located in the /contrib/${CI_USER}/SETUP/ directory. - -CI_USER="Terry.McGuinness" - -# Get the Operating System name from /etc/os-release -OS_NAME=$(grep -E '^ID=' /etc/os-release | sed -E 's/ID="?([^"]*)"?/\1/') || true - -# Check if the OS is Rocky or CentOS -if [[ "${OS_NAME}" == "rocky" || "${OS_NAME}" == "centos" ]]; then - echo "Operating System is ${OS_NAME}" -else - echo "Unsupported Operating System: ${OS_NAME}" - exit 1 -fi - -running=$(pgrep -u "${USER}" run-helper -c) || true -if [[ "${running}" -gt 0 ]]; then - echo "actions-runner is already running" - exit -fi - -cp "/contrib/${CI_USER}/SETUP/actions-runner_${OS_NAME}.tar.gz" "${HOME}" -cd "${HOME}" || exit -tar -xf "actions-runner_${OS_NAME}.tar.gz" -cd actions-runner || exit -d=$(date +%Y-%m-%d-%H:%M) -nohup ./run.sh >& "run_nohup${d}.log" & diff --git a/dev/ci/scripts/utils/parse_yaml.py b/dev/ci/scripts/utils/parse_yaml.py index c22ad69983e..96969d89ad1 100755 --- a/dev/ci/scripts/utils/parse_yaml.py +++ b/dev/ci/scripts/utils/parse_yaml.py @@ -9,9 +9,7 @@ from wxflow import AttrDict, parse_j2yaml from argparse import ArgumentParser from pathlib import Path - -_here = os.path.dirname(__file__) -_top = os.path.abspath(os.path.join(os.path.abspath(_here), '../../../..')) +from find_homegfs import find_homegfs description = """parse yaml file and return value of key""" @@ -28,6 +26,8 @@ def parse_args(): parser.add_argument('-y', '--yaml', help='full path to yaml file to parse', type=Path, required=True) parser.add_argument('-k', '--key', help='key to return value of', type=str, required=True) parser.add_argument('-s', '--string', help='output results as strings', action="store_true", required=False) + parser.add_argument('-d', '--default', help='default value to return if key is not found', type=str, required=False) + parser.add_argument('-f', '--fail-on-missing', help='exit with code 1 if key is not found', action="store_true", required=False) return parser.parse_args() @@ -43,9 +43,8 @@ def yq(yamlfile, key): The value of the specified key in the yaml file. """ - data = AttrDict(HOMEgfs=_top) - data.update({'HOMEgfs': _top}) - ydict = parse_j2yaml(path=yamlfile, data=data) + HOMEgfs = find_homegfs() + ydict = parse_j2yaml(path=yamlfile, data={'HOMEgfs': HOMEgfs}) if key == 'all': return ydict list_keys = key.split('.') @@ -63,6 +62,19 @@ def yq(yamlfile, key): args = parse_args() values = yq(args.yaml, args.key) + + # Handle missing values + if values is None: + if hasattr(args, 'fail_on_missing') and args.fail_on_missing: + print(f"Error: Key '{args.key}' not found in {args.yaml}", file=sys.stderr) + sys.exit(1) + elif hasattr(args, 'default') and args.default is not None: + values = args.default + else: + # For shell script usage, an empty output is often more useful than "None" + sys.exit(0) + + # Output formatting if args.string and isinstance(values, list): for value in values: print(value) diff --git a/dev/ci/scripts/utils/pr_list_database.py b/dev/ci/scripts/utils/pr_list_database.py deleted file mode 100755 index 3b53d211344..00000000000 --- a/dev/ci/scripts/utils/pr_list_database.py +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import os -from typing import List -from wxflow import SQLiteDB, SQLiteDBError -from githubpr import GitHubPR -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, REMAINDER -import argparse - - -def full_path(string): - """ - full_path Get the absolute path of a file or directory. - Parameters - ---------- - string : str - The relative path of the file or directory. - - Returns - ------- - str - The absolute path of the file or directory. - - Raises - ------ - NotADirectoryError - If the provided string does not represent a valid file or directory. - """ - - if os.path.isfile(string) or os.path.isdir(os.path.dirname(string)): - return os.path.abspath(string) - else: - raise NotADirectoryError(string) - - -def create_table(db: SQLiteDB): - """ - Create a new table in a database. - - Parameters - ---------- - db : SQLiteDB - The database to create. - """ - - db.create_table('pr_list', ['pr INTEGER PRIMARY KEY UNIQUE', 'state TEXT', 'status TEXT', 'reset_id INTEGER', 'cases TEXT']) - - -def add_pr(ci_database: SQLiteDB, pr: str) -> bool: - """ - Add a pull request to the database. - - Parameters - ---------- - ci_database : SQLiteDB - The database to add the pull request to. - - pr : str - The pull request to add. - """ - - entities = (pr, 'Open', 'Ready', 0, 'ci_repo') - try: - ci_database.insert_data('pr_list', entities) - return True - except (SQLiteDBError.IntegrityError) as e: - if 'unique' in str(e).lower(): - print(f"pr {pr} already is in list: nothing added") - return False - - -def update_pr(ci_database: SQLiteDB, args: argparse.Namespace): - """ - Update a pull request in the database. - - Parameters - ---------- - ci_database : SQLiteDB - The database to update the pull request in. - - args : argparse.Namespace - The command line arguments. - """ - - if len(args.update_pr) < 2: - print(f"update_pr must have at least one vaule to update") - sys.exit(0) - - update_list = ['state', 'status', 'reset_id', 'cases'] - for value in args.update_pr[1:]: - update = update_list.pop(0) - ci_database.update_data('pr_list', update, value, 'pr', args.update_pr[0]) - - -def display_db(ci_database: SQLiteDB, display: List[str]) -> list: - """ - Display the database. - - Parameters - ---------- - ci_database : SQLiteDB - The database to display. - - display : argparse.Namespace - The command line arguments. - - Returns - ------- - list - The rows of the database. - """ - - values = [] - if len(display) == 1: - rows = ci_database.fetch_data('pr_list', ['pr', 'state', 'status', 'reset_id', 'cases'], f"pr = '{display[0]}'") - if len(display) == 2: - rows = ci_database.fetch_data('pr_list', ['pr'], f"state = '{display[0]}' AND status = '{display[1]}'") - if len(display) == 0: - rows = ci_database.fetch_data('pr_list', ['pr', 'state', 'status', 'reset_id', 'cases']) - for row in rows: - values.append(' '.join(map(str, row))) - - return values - - -def update_database(ci_database: SQLiteDB) -> list: - """ - Update the database from the GitHub PRs - - only PRs from host machine are added to the database - - if the PR is already in the database it its added to the kill list - - Parameters - ---------- - ci_database : SQLiteDB - The database to update. - - Returns - ------- - list - The kill list of pull requests. - """ - - gh = GitHubPR() - pr_ready_list, pr_kill_list = gh.get_open_pr_list() - for pr in pr_ready_list: - if not add_pr(ci_database, str(pr)): - if pr not in pr_kill_list: - pr_kill_list.append(pr) - pr_kill_list = list(set(pr_kill_list)) - return pr_kill_list - - -def input_args(): - """ - Parse command line arguments. - - Returns - ------- - argparse.Namespace - The parsed command line arguments. - """ - description = """Arguments for creating and updating db file for pr states - """ - - parser = ArgumentParser(description=description, - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument('--dbfile', help='SQLite3 database file with PR list', type=full_path) - parser.add_argument('--create', help='create sqlite file for pr list status', action='store_true', required=False) - parser.add_argument('--add_pr', nargs=1, metavar='PR', help='add new pr to list (defults to: Open,Ready)', required=False) - parser.add_argument('--remove_pr', nargs=1, metavar='PR', help='removes pr from list', required=False) - parser.add_argument('--update_pr', nargs=REMAINDER, metavar=('pr', 'state', 'status', 'reset_id', 'cases'), - help='updates state and status of a given pr', required=False) - parser.add_argument('--display', nargs='*', help='output pr table', required=False) - parser.add_argument('--list', nargs=2, metavar=('state', 'status'), required=False) - parser.add_argument('--update_database', help='use labels from Open GitHub PRs to update database state and produces a kill list', - action='store_true', required=False) - args = parser.parse_args() - return args - - -if __name__ == '__main__': - - args = input_args() - - if not args.create: - if not os.path.isfile(args.dbfile): - print(f'Error: {args.dbfile} does not exsist') - sys.exit(-1) - - ci_database = SQLiteDB(args.dbfile) - ci_database.connect() - - if args.create: - create_table(ci_database) - if args.add_pr: - add_pr(ci_database, args.add_pr[0]) - if args.update_pr: - update_pr(ci_database, args) - if args.remove_pr: - ci_database.remove_data('pr_list', 'PR', args.remove_pr[0]) - if args.display is not None: - for rows in display_db(ci_database, args.display): - print(rows) - if args.list: - for rows in display_db(ci_database, [args.list[0], args.list[1]]): - print(rows, end=' ') - print() - if args.update_database: - pr_kill_list = update_database(ci_database) - for pr in pr_kill_list: - print(pr, end=' ') - print() - - ci_database.disconnect() diff --git a/dev/ctests/CMakeLists.txt b/dev/ctests/CMakeLists.txt index 3ec286852be..25de0fce593 100644 --- a/dev/ctests/CMakeLists.txt +++ b/dev/ctests/CMakeLists.txt @@ -85,18 +85,18 @@ function(AddJJOBTest) add_test(NAME test_${TEST_NAME}_stage COMMAND ./stage.sh ${TEST_NAME} ${ARG_TEST_DATE} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/scripts) - set_tests_properties(test_${TEST_NAME}_stage PROPERTIES DEPENDS test_${TEST_NAME}_setup LABELS "${ARG_CASE};${ARG_JOB}") + set_tests_properties(test_${TEST_NAME}_stage PROPERTIES DEPENDS test_${TEST_NAME}_setup LABELS "${ARG_CASE}_${ARG_JOB}") add_test(NAME test_${TEST_NAME}_execute COMMAND ./execute.sh ${TEST_NAME} ${ARG_JOB} ${ARG_TEST_DATE} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/scripts) - set_tests_properties(test_${TEST_NAME}_execute PROPERTIES DEPENDS test_${TEST_NAME}_stage LABELS "${ARG_CASE};${ARG_JOB}") + set_tests_properties(test_${TEST_NAME}_execute PROPERTIES DEPENDS test_${TEST_NAME}_stage LABELS "${ARG_CASE}_${ARG_JOB}") # TODO - This is a stub for the validation step add_test(NAME test_${TEST_NAME}_validate COMMAND ./validate.sh ${TEST_NAME} ${ARG_TEST_DATE} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/scripts) - set_tests_properties(test_${TEST_NAME}_validate PROPERTIES DEPENDS test_${TEST_NAME}_execute LABELS "${ARG_CASE};${ARG_JOB}") + set_tests_properties(test_${TEST_NAME}_validate PROPERTIES DEPENDS test_${TEST_NAME}_execute LABELS "${ARG_CASE}_${ARG_JOB}") endfunction() AddJJOBTest( diff --git a/dev/ctests/scripts/execute.sh.in b/dev/ctests/scripts/execute.sh.in index 52ae03a43ae..8dc17f2c654 100755 --- a/dev/ctests/scripts/execute.sh.in +++ b/dev/ctests/scripts/execute.sh.in @@ -28,7 +28,7 @@ lack_of_job_count=0 LACK_OF_JOB_LIMIT=5 while true; do - job_status=$(sacct -j "${job_id}" --format=State --noheader -n | head -1) || true + job_status=$(sacct -j "${job_id}" --format=State --noheader -n | head -1 | tr -cd '[:alpha:]' | xargs) || true if [[ -n "${job_status}" ]]; then echo "Job ${job_id} found in sacct." break @@ -46,8 +46,7 @@ done timeout=0 TIMEOUT=60 while true; do - # Trim trailing spaces from job_status - job_status=$(sacct -j "${job_id}" --format=State --noheader -n | head -1 | xargs) || true + job_status=$(sacct -j "${job_id}" --format=State --noheader -n | head -1 | tr -cd '[:alpha:]' | xargs) || true if [[ "${job_status}" == "COMPLETED" ]]; then echo "Job ${job_id} completed successfully." break diff --git a/dev/ctests/scripts/setup.sh.in b/dev/ctests/scripts/setup.sh.in index 43ac2b7b01b..10a263fac84 100755 --- a/dev/ctests/scripts/setup.sh.in +++ b/dev/ctests/scripts/setup.sh.in @@ -19,7 +19,7 @@ pslot="${TEST_NAME}" \ RUNTESTS="${RUNTESTS}" \ ICSDIR_ROOT="${ICSDIR_ROOT}" \ HPC_ACCOUNT="${HPC_ACCOUNT}" \ -"${HOMEgfs}/workflow/create_experiment.py" --yaml "${YAML_FILE}" --overwrite +"${HOMEgfs}/dev/workflow/create_experiment.py" --yaml "${YAML_FILE}" --overwrite rc=$? if [[ "${rc}" -ne 0 ]]; then set +x