From bd2d63600aac55e0ed75bf2dca296b19622ddff2 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Mon, 21 Mar 2022 15:27:42 -0500 Subject: [PATCH 01/11] Use new python env on orion --- modulefiles/GDAS/orion.lua | 47 +++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/modulefiles/GDAS/orion.lua b/modulefiles/GDAS/orion.lua index 34e329334..c68a1e093 100644 --- a/modulefiles/GDAS/orion.lua +++ b/modulefiles/GDAS/orion.lua @@ -1,4 +1,4 @@ --- NOAA HPC Orion Modulefile for UFS-DA +-- NOAA HPC Orion Modulefile for GDASApp help([[ ]]) @@ -11,19 +11,50 @@ setenv('JEDI_OPT', jedi_opt) local jedi_core = pathJoin(jedi_opt, 'modulefiles/core') prepend_path("MODULEPATH", jedi_core) -load('jedi/intel-impi') +prepend_path("MODULEPATH", '/work2/noaa/da/python/opt/modulefiles/stack') +load("cmake/3.18.1") +load("git/2.28.0") +load("git-lfs/2.13.2") + +load("jedi-intel/2020.2") +load("mkl/2020.2") +load("szip/2.1.1") +load("zlib/1.2.11") +load("udunits/2.2.28") +load("gsl_lite/0.37.0") +load("jedi-impi/2020.2") + +load("hdf5/1.12.0") +load("pnetcdf/1.12.1") +load("netcdf/4.7.4") + +load("boost-headers/1.68.0") +load("eigen/3.3.7") +load("bufr/noaa-emc-11.5.0") +load("pybind11/2.7.0") +load("nccmp/1.8.7.0") +load("pio/2.5.1-debug") + +load("ecbuild/ecmwf-3.6.1") +load("eckit/ecmwf-1.16.0") +load("fckit/ecmwf-0.9.2") +load("atlas/ecmwf-0.24.1") + +load("hpc") +load("miniconda3") +load("gdasapp") + +setenv("CC","mpiicc") +setenv("FC","mpiifort") +setenv("CXX","mpiicpc") local mpiexec = '/opt/slurm/bin/srun' local mpinproc = '-n' setenv('MPIEXEC_EXEC', mpiexec) setenv('MPIEXEC_NPROC', mpinproc) --- add R2D2 and SOLO to PYTHONPATH -prepend_path("PYTHONPATH", "/work2/noaa/da/cmartin/UFSDA/python/local/lib/python3.9/site-packages") --- add R2D2 to path -prepend_path("PATH", "/work2/noaa/da/cmartin/UFSDA/python/local/bin") whatis("Name: ".. pkgName) whatis("Version: " .. pkgVersion) -whatis("Category: UFS-DA") -whatis("Description: Load JEDI-Stack for UFS-DA") +whatis("Category: GDASApp") +whatis("Description: Load all libraries needed for GDASApp") From b9d6e10bc72f1250e1098c2747517c449a9acf1c Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 10:42:27 -0500 Subject: [PATCH 02/11] Start of CI cron scripts --- ush/HPC-CI/driver.sh | 88 ++++++++++++++++++++++++++++++++++++++++++++ ush/HPC-CI/orion.sh | 10 +++++ ush/HPC-CI/run_ci.sh | 59 +++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100755 ush/HPC-CI/driver.sh create mode 100644 ush/HPC-CI/orion.sh create mode 100755 ush/HPC-CI/run_ci.sh diff --git a/ush/HPC-CI/driver.sh b/ush/HPC-CI/driver.sh new file mode 100755 index 000000000..646f27046 --- /dev/null +++ b/ush/HPC-CI/driver.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +my_dir="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )" + +# ============================================================================== +usage() { + set +x + echo + echo "Usage: $0 -t -h" + echo + echo " -t target/machine script is running on DEFAULT: $(hostname)" + echo " -h display this message and quit" + echo + exit 1 +} + +# ============================================================================== +# First, set up runtime environment + +export TARGET="$(hostname)" + +while getopts "t:h" opt; do + case $opt in + t) + TARGET=$OPTARG + ;; + h|\?|:) + usage + ;; + esac +done + +case ${TARGET} in + hera | orion) + echo "Running Automated Testing on $TARGET" + source $MODULESHOME/init/sh + source $my_dir/${TARGET}.sh + module purge + module use $GDAS_MODULE_USE + module load GDAS/$TARGET + module list + ;; + *) + echo "Unsupported platform. Exiting with error." + exit 1 + ;; +esac + + +# ============================================================================== +# pull on the repo and get list of open PRs +cd $GDAS_CI_ROOT/repo +CI_LABEL="${GDAS_CI_HOST}-RT" +gh pr list --label "$CI_LABEL" --state "open" | awk '{print $1;}' > $GDAS_CI_ROOT/open_pr_list +open_pr_list=$(cat $GDAS_CI_ROOT/open_pr_list) + +# ============================================================================== +# clone, checkout, build, test, etc. +repo_url="https://github.com/NOAA-EMC/GDASApp.git" +# loop through all open PRs +for pr in $open_pr_list; do + echo "Processing Pull Request #${pr}" + mkdir -p $GDAS_CI_ROOT/PR/$pr + cd $GDAS_CI_ROOT/PR/$pr + + # clone copy of repo + git clone $repo_url + cd GDASApp + + # checkout pull request + git pull + gh pr checkout $pr + + # get commit hash + commit=$(git log --pretty=format:'%h' -n 1) + if [ -f "$GDAS_CI_ROOT/PR/$pr/commit" ]; then + oldcommit=$(cat $GDAS_CI_ROOT/PR/$pr/commit) + if [ $oldcommit == $commit ]; then + # do no more for this PR, as the commit has already been tested + continue + fi + fi + echo "$commit" > $GDAS_CI_ROOT/PR/$pr/commit + + # run build and testing command + $my_dir/run_ci.sh -d $GDAS_CI_ROOT/PR/$pr/GDASApp -o $GDAS_CI_ROOT/PR/$pr/output_${commit} +done + diff --git a/ush/HPC-CI/orion.sh b/ush/HPC-CI/orion.sh new file mode 100644 index 000000000..b07ff5550 --- /dev/null +++ b/ush/HPC-CI/orion.sh @@ -0,0 +1,10 @@ +GDAS_CI_ROOT=/work2/noaa/stmp/cmartin/CI/GDASApp +GDAS_CI_HOST='orion' +GDAS_MODULE_USE=$GDAS_CI_ROOT/repo/modulefiles +export SLURM_ACCOUNT=da-cpu +export SALLOC_ACCOUNT=$SLURM_ACCOUNT +export SBATCH_ACCOUNT=$SLURM_ACCOUNT +export SLURM_QOS=debug +export SLURM_EXCLUSIVE=user +export OMP_NUM_THREADS=1 +ulimit -s unlimited diff --git a/ush/HPC-CI/run_ci.sh b/ush/HPC-CI/run_ci.sh new file mode 100755 index 000000000..40281aae5 --- /dev/null +++ b/ush/HPC-CI/run_ci.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#set -eu + +# ============================================================================== +usage() { + set +x + echo + echo "Usage: $0 -d -o -h" + echo + echo " -d Run build and ctest for clone in " + echo " -o Path to output message detailing results of CI tests" + echo " -h display this message and quit" + echo + exit 1 +} + +# ============================================================================== +while getopts "d:o:h" opt; do + case $opt in + d) + repodir=$OPTARG + ;; + o) + outfile=$OPTARG + ;; + h|\?|:) + usage + ;; + esac +done + +# ============================================================================== +# start output file +echo "Automated ${TARGET} Pull Request Testing Results:" > $outfile +echo '```' >> $outfile +echo "Start: $(date) on $(hostname)" >> $outfile +# ============================================================================== +# run build script +cd $repodir +module purge +./build.sh -t $TARGET +build_status=$? +if [ $build_status -eq 0 ]; then + echo "Build: *SUCCESS*" >> $outfile + echo "Build: Completed at $(date)" >> $outfile +else + echo "Build: *FAILED*" >> $outfile + echo "Build: Failed at $(date)" >> $outfile + echo '```' >> $outfile + exit 1 +fi +# ============================================================================== +# run ctests +cd $repodir/build +echo "---------------------------------" >> $outfile +ctest --output-on-failure &>> $outfile +ctest_status=$? +echo '```' >> $outfile +exit $ctest_status From 14a2d1e1c6c88241d1956632039d27e6e87131e2 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 10:56:01 -0500 Subject: [PATCH 03/11] Add writing of comment message --- ush/HPC-CI/driver.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ush/HPC-CI/driver.sh b/ush/HPC-CI/driver.sh index 646f27046..e3085aa37 100755 --- a/ush/HPC-CI/driver.sh +++ b/ush/HPC-CI/driver.sh @@ -84,5 +84,10 @@ for pr in $open_pr_list; do # run build and testing command $my_dir/run_ci.sh -d $GDAS_CI_ROOT/PR/$pr/GDASApp -o $GDAS_CI_ROOT/PR/$pr/output_${commit} + ci_status=$? + git pr comment $pr --body-file $GDAS_CI_ROOT/PR/$pr/output_${commit} + if [ $ci_status -eq 0 ]; then + gh pr edit $pr --remove-label $CI_LABEL + fi done From 52a0c02511d1a2077dc11dc7d8c56e9c5c20482e Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 11:06:07 -0500 Subject: [PATCH 04/11] Make it a login shell for modules --- ush/HPC-CI/driver.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/HPC-CI/driver.sh b/ush/HPC-CI/driver.sh index e3085aa37..dd1b84b27 100755 --- a/ush/HPC-CI/driver.sh +++ b/ush/HPC-CI/driver.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash --login my_dir="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )" From 5d9a9efca7a0ea0c8e7d6969dcb66ca8a7106ca4 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 13:35:39 -0500 Subject: [PATCH 05/11] Fix typo --- ush/HPC-CI/driver.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/HPC-CI/driver.sh b/ush/HPC-CI/driver.sh index dd1b84b27..1c14b72f4 100755 --- a/ush/HPC-CI/driver.sh +++ b/ush/HPC-CI/driver.sh @@ -85,7 +85,7 @@ for pr in $open_pr_list; do # run build and testing command $my_dir/run_ci.sh -d $GDAS_CI_ROOT/PR/$pr/GDASApp -o $GDAS_CI_ROOT/PR/$pr/output_${commit} ci_status=$? - git pr comment $pr --body-file $GDAS_CI_ROOT/PR/$pr/output_${commit} + gh pr comment $pr --body-file $GDAS_CI_ROOT/PR/$pr/output_${commit} if [ $ci_status -eq 0 ]; then gh pr edit $pr --remove-label $CI_LABEL fi From d54c8c687dda1622bd0698a2254b4c0808e1e153 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:11:35 -0500 Subject: [PATCH 06/11] Commit to save --- ush/HPC-CI/orion.sh | 2 +- ush/HPC-CI/run_ci.sh | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ush/HPC-CI/orion.sh b/ush/HPC-CI/orion.sh index b07ff5550..1084046ce 100644 --- a/ush/HPC-CI/orion.sh +++ b/ush/HPC-CI/orion.sh @@ -1,6 +1,6 @@ GDAS_CI_ROOT=/work2/noaa/stmp/cmartin/CI/GDASApp GDAS_CI_HOST='orion' -GDAS_MODULE_USE=$GDAS_CI_ROOT/repo/modulefiles +export GDAS_MODULE_USE=$GDAS_CI_ROOT/repo/modulefiles export SLURM_ACCOUNT=da-cpu export SALLOC_ACCOUNT=$SLURM_ACCOUNT export SBATCH_ACCOUNT=$SLURM_ACCOUNT diff --git a/ush/HPC-CI/run_ci.sh b/ush/HPC-CI/run_ci.sh index 40281aae5..5f13683d4 100755 --- a/ush/HPC-CI/run_ci.sh +++ b/ush/HPC-CI/run_ci.sh @@ -31,14 +31,15 @@ done # ============================================================================== # start output file -echo "Automated ${TARGET} Pull Request Testing Results:" > $outfile +echo "Automated Pull Request Testing Results:" > $outfile +echo "Machine: ${TARGET}" >> $outfile echo '```' >> $outfile echo "Start: $(date) on $(hostname)" >> $outfile # ============================================================================== # run build script cd $repodir module purge -./build.sh -t $TARGET +./build.sh -t $TARGET &>> log.build build_status=$? if [ $build_status -eq 0 ]; then echo "Build: *SUCCESS*" >> $outfile @@ -46,14 +47,18 @@ if [ $build_status -eq 0 ]; then else echo "Build: *FAILED*" >> $outfile echo "Build: Failed at $(date)" >> $outfile + echo "Build: see output at $repodir/log.build" >> $outfile echo '```' >> $outfile exit 1 fi # ============================================================================== # run ctests cd $repodir/build +module use $GDAS_MODULE_USE +module load GDAS/$TARGET echo "---------------------------------" >> $outfile ctest --output-on-failure &>> $outfile +echo "Completed at $(date)" >> $outfile ctest_status=$? echo '```' >> $outfile exit $ctest_status From 31350264bc19079ecca0b2906139b9fb73b1f168 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:13:38 -0500 Subject: [PATCH 07/11] move files to ci/ --- {ush/HPC-CI => ci}/driver.sh | 0 {ush/HPC-CI => ci}/orion.sh | 0 {ush/HPC-CI => ci}/run_ci.sh | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {ush/HPC-CI => ci}/driver.sh (100%) rename {ush/HPC-CI => ci}/orion.sh (100%) rename {ush/HPC-CI => ci}/run_ci.sh (100%) diff --git a/ush/HPC-CI/driver.sh b/ci/driver.sh similarity index 100% rename from ush/HPC-CI/driver.sh rename to ci/driver.sh diff --git a/ush/HPC-CI/orion.sh b/ci/orion.sh similarity index 100% rename from ush/HPC-CI/orion.sh rename to ci/orion.sh diff --git a/ush/HPC-CI/run_ci.sh b/ci/run_ci.sh similarity index 100% rename from ush/HPC-CI/run_ci.sh rename to ci/run_ci.sh From 3189c35945bdc62e2679bd996fc1f9ad8451d393 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:14:28 -0500 Subject: [PATCH 08/11] Comment out repos for testing/debugging --- CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fdc3f2264..a34cb9663 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,23 +62,23 @@ if(BUILD_GDASBUNDLE) # Core JEDI repositories ecbuild_bundle( PROJECT oops GIT "https://github.com/jcsda/oops.git" BRANCH develop UPDATE ) - ecbuild_bundle( PROJECT saber GIT "https://github.com/jcsda/saber.git" BRANCH develop UPDATE ) + #ecbuild_bundle( PROJECT saber GIT "https://github.com/jcsda/saber.git" BRANCH develop UPDATE ) ecbuild_bundle( PROJECT ioda GIT "https://github.com/jcsda/ioda.git" BRANCH develop UPDATE ) - ecbuild_bundle( PROJECT ufo GIT "https://github.com/noaa-emc/ufo.git" BRANCH feature/ufsda UPDATE ) + #ecbuild_bundle( PROJECT ufo GIT "https://github.com/noaa-emc/ufo.git" BRANCH feature/ufsda UPDATE ) # FMS and FV3 dynamical core - ecbuild_bundle( PROJECT fms GIT "https://github.com/jcsda/FMS.git" BRANCH release-stable UPDATE ) - ecbuild_bundle( PROJECT fv3 GIT "https://github.com/jcsda/GFDL_atmos_cubed_sphere.git" BRANCH release-stable UPDATE ) + #ecbuild_bundle( PROJECT fms GIT "https://github.com/jcsda/FMS.git" BRANCH release-stable UPDATE ) + #ecbuild_bundle( PROJECT fv3 GIT "https://github.com/jcsda/GFDL_atmos_cubed_sphere.git" BRANCH release-stable UPDATE ) # fv3-jedi and associated repositories - ecbuild_bundle( PROJECT femps GIT "https://github.com/jcsda/femps.git" BRANCH develop UPDATE ) - ecbuild_bundle( PROJECT fv3-jedi-lm GIT "https://github.com/jcsda/fv3-jedi-linearmodel.git" BRANCH develop UPDATE ) - ecbuild_bundle( PROJECT fv3-jedi GIT "https://github.com/jcsda/fv3-jedi.git" BRANCH develop UPDATE ) + #ecbuild_bundle( PROJECT femps GIT "https://github.com/jcsda/femps.git" BRANCH develop UPDATE ) + #ecbuild_bundle( PROJECT fv3-jedi-lm GIT "https://github.com/jcsda/fv3-jedi-linearmodel.git" BRANCH develop UPDATE ) + #ecbuild_bundle( PROJECT fv3-jedi GIT "https://github.com/jcsda/fv3-jedi.git" BRANCH develop UPDATE ) # SOCA associated repositories - ecbuild_bundle( PROJECT gsw GIT "https://github.com/jcsda-internal/GSW-Fortran.git" UPDATE BRANCH develop ) - ecbuild_bundle( PROJECT mom6 GIT "https://github.com/jcsda-internal/MOM6.git" UPDATE BRANCH main-ecbuild RECURSIVE ) - ecbuild_bundle( PROJECT soca GIT "https://github.com/jcsda-internal/soca.git" UPDATE BRANCH develop ) + #ecbuild_bundle( PROJECT gsw GIT "https://github.com/jcsda-internal/GSW-Fortran.git" UPDATE BRANCH develop ) + #ecbuild_bundle( PROJECT mom6 GIT "https://github.com/jcsda-internal/MOM6.git" UPDATE BRANCH main-ecbuild RECURSIVE ) + #ecbuild_bundle( PROJECT soca GIT "https://github.com/jcsda-internal/soca.git" UPDATE BRANCH develop ) # Build IODA converters option(BUILD_IODA_CONVERTERS "Build IODA Converters" OFF) From 4bfc5c7f8b9e0184535c56e3f1c938a800dbf330 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:23:01 -0500 Subject: [PATCH 09/11] Remove CRTM --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a34cb9663..f2a29da1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ if(BUILD_GDASBUNDLE) ecbuild_bundle( PROJECT atlas GIT "https://github.com/ecmwf/atlas.git" TAG 0.24.1 ) # External (required) observation operators - ecbuild_bundle( PROJECT crtm GIT "https://github.com/jcsda/crtm.git" TAG v2.3-jedi.3 ) + #ecbuild_bundle( PROJECT crtm GIT "https://github.com/jcsda/crtm.git" TAG v2.3-jedi.3 ) # Core JEDI repositories ecbuild_bundle( PROJECT oops GIT "https://github.com/jcsda/oops.git" BRANCH develop UPDATE ) From 9f11f281700e569961d27e821974943e5da232be Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:40:41 -0500 Subject: [PATCH 10/11] Add scrubber and change of labels --- ci/driver.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ci/driver.sh b/ci/driver.sh index 1c14b72f4..cbd326960 100755 --- a/ci/driver.sh +++ b/ci/driver.sh @@ -59,6 +59,7 @@ open_pr_list=$(cat $GDAS_CI_ROOT/open_pr_list) repo_url="https://github.com/NOAA-EMC/GDASApp.git" # loop through all open PRs for pr in $open_pr_list; do + gh pr edit $pr --remove-label $CI_LABEL --add-label ${CI_LABEL}-Running echo "Processing Pull Request #${pr}" mkdir -p $GDAS_CI_ROOT/PR/$pr cd $GDAS_CI_ROOT/PR/$pr @@ -87,7 +88,13 @@ for pr in $open_pr_list; do ci_status=$? gh pr comment $pr --body-file $GDAS_CI_ROOT/PR/$pr/output_${commit} if [ $ci_status -eq 0 ]; then - gh pr edit $pr --remove-label $CI_LABEL + gh pr edit $pr --remove-label ${CI_LABEL}-Running --add-label ${CI_LABEL}-Passed + else + gh pr edit $pr --remove-label ${CI_LABEL}-Running --add-label ${CI_LABEL}-Failed fi done +# ============================================================================== +# scrub working directory for older files +find $GDAS_CI_ROOT/PR/* -mtime +3 -exec rm -rf {} \; + From 39dab947ad3c6d01892045708cc6954ee3662863 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Tue, 22 Mar 2022 14:52:19 -0500 Subject: [PATCH 11/11] Changes to the CI script --- ci/run_ci.sh | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/ci/run_ci.sh b/ci/run_ci.sh index 5f13683d4..84b0434c9 100755 --- a/ci/run_ci.sh +++ b/ci/run_ci.sh @@ -42,23 +42,33 @@ module purge ./build.sh -t $TARGET &>> log.build build_status=$? if [ $build_status -eq 0 ]; then - echo "Build: *SUCCESS*" >> $outfile + echo "Build: *SUCCESS*" >> $outfile echo "Build: Completed at $(date)" >> $outfile else - echo "Build: *FAILED*" >> $outfile + echo "Build: *FAILED*" >> $outfile echo "Build: Failed at $(date)" >> $outfile echo "Build: see output at $repodir/log.build" >> $outfile echo '```' >> $outfile - exit 1 + exit $build_status fi # ============================================================================== # run ctests cd $repodir/build module use $GDAS_MODULE_USE module load GDAS/$TARGET -echo "---------------------------------" >> $outfile -ctest --output-on-failure &>> $outfile -echo "Completed at $(date)" >> $outfile +echo "---------------------------------------------------" >> $outfile +ctest --output-on-failure &>> log.ctest ctest_status=$? +npassed=$(cat log.ctest | grep "tests passed") +if [ $ctest_status -eq 0 ]; then + echo "Tests: *SUCCESS*" >> $outfile + echo "Tests: Completed at $(date)" >> $outfile + echo "Tests: $npassed" >> $outfile +else + echo "Tests: *Failed*" >> $outfile + echo "Tests: Failed at $(date)" >> $outfile + echo "Tests: $npassed" >> $outfile + echo "Tests: see output at $repodir/build/log.ctest" >> $outfile +fi echo '```' >> $outfile exit $ctest_status