diff --git a/.cicd/Jenkinsfile b/.cicd/Jenkinsfile index 660423c3c3..f4e0294441 100644 --- a/.cicd/Jenkinsfile +++ b/.cicd/Jenkinsfile @@ -10,11 +10,11 @@ pipeline { parameters { // Allow job runner to filter based on platform // Use the line below to enable all PW clusters - // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use') + // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use') // Use the line below to enable the PW AWS cluster - // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use') - // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use') - choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use') + // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use') + // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use') + choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use') // Allow job runner to filter based on compiler choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build') // Uncomment the following line to re-enable comprehensive tests @@ -77,8 +77,8 @@ pipeline { axes { axis { name 'SRW_PLATFORM' - // values 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules'//, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' - values 'gaea', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' + // values 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' + values 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' } axis { @@ -92,7 +92,7 @@ pipeline { exclude { axis { name 'SRW_PLATFORM' - values 'gaea', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1' + values 'gaea', 'gaea-c5', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1' } axis { diff --git a/.cicd/scripts/wrapper_srw_ftest.sh b/.cicd/scripts/wrapper_srw_ftest.sh index 14552d78e1..02ece830f9 100755 --- a/.cicd/scripts/wrapper_srw_ftest.sh +++ b/.cicd/scripts/wrapper_srw_ftest.sh @@ -28,6 +28,11 @@ if [[ "${SRW_PLATFORM}" == gaea ]]; then sed -i 's|qos=batch|qos=windfall|g' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh fi +if [[ "${SRW_PLATFORM}" == gaea-c5 ]]; then + sed -i '15i #SBATCH --clusters=c5' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh + sed -i 's|qos=batch|qos=normal|g' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh +fi + # Call job card and return job_id echo "Running: ${workflow_cmd} -A ${SRW_PROJECT} ${arg_1} ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh" job_id=$(${workflow_cmd} -A ${SRW_PROJECT} ${arg_1} ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh) diff --git a/devbuild.sh b/devbuild.sh index 6c703816ec..b21998fc8e 100755 --- a/devbuild.sh +++ b/devbuild.sh @@ -228,7 +228,7 @@ set -eu # automatically determine compiler if [ -z "${COMPILER}" ] ; then case ${PLATFORM} in - jet|hera|gaea) COMPILER=intel ;; + jet|hera|gaea|gaea-c5) COMPILER=intel ;; orion) COMPILER=intel ;; wcoss2) COMPILER=intel ;; cheyenne) COMPILER=intel ;; diff --git a/etc/lmod-setup.csh b/etc/lmod-setup.csh index 4be4659c87..dc8fb2b064 100644 --- a/etc/lmod-setup.csh +++ b/etc/lmod-setup.csh @@ -40,6 +40,9 @@ else if ( "$L_MACHINE" == singularity ) then else if ( "$L_MACHINE" == gaea ) then source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh +else if ( "$L_MACHINE" == gaea-c5 ) then + source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.csh + else if ( "$L_MACHINE" == derecho ) then module reset diff --git a/etc/lmod-setup.sh b/etc/lmod-setup.sh index 4c89d631d7..8a6b651958 100644 --- a/etc/lmod-setup.sh +++ b/etc/lmod-setup.sh @@ -47,6 +47,9 @@ elif [ "$L_MACHINE" = singularity ]; then elif [ "$L_MACHINE" = gaea ]; then source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh +elif [ "$L_MACHINE" = gaea-c5 ]; then + source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.sh + elif [ "$L_MACHINE" = derecho ]; then module reset diff --git a/modulefiles/build_gaea-c5_intel.lua b/modulefiles/build_gaea-c5_intel.lua new file mode 100644 index 0000000000..84d05290d7 --- /dev/null +++ b/modulefiles/build_gaea-c5_intel.lua @@ -0,0 +1,30 @@ +help([[ +This module loads libraries for building the UFS SRW App on +the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0 +]]) + +whatis([===[Loads libraries needed for building the UFS SRW App on Gaea C5 ]===]) + +load(pathJoin("cmake", os.getenv("cmake_ver") or "3.23.1")) + +prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/hpc-stack/intel-classic-2023.1.0/modulefiles/stack") +load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0")) +load(pathJoin("intel-classic", os.getenv("intel_classic_ver") or "2023.1.0")) +load(pathJoin("cray-mpich", os.getenv("cray_mpich_ver") or "8.1.25")) +load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2023.1.0")) +load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "8.1.25")) + +load("srw_common") + +unload("darshan-runtime/3.4.0") +setenv("CFLAGS","-diag-disable=10441") +setenv("FFLAGS","-diag-disable=10441") + +setenv("CC","cc") +setenv("FC","ftn") +setenv("CXX","CC") +setenv("CMAKE_C_COMPILER","cc") +setenv("CMAKE_Fortran_COMPILER","ftn") +setenv("CMAKE_CXX_COMPILER","CC") +setenv("CMAKE_Platform","gaea-c5.intel") + diff --git a/modulefiles/tasks/gaea-c5/plot_allvars.local.lua b/modulefiles/tasks/gaea-c5/plot_allvars.local.lua new file mode 100644 index 0000000000..7c2e3a9ba2 --- /dev/null +++ b/modulefiles/tasks/gaea-c5/plot_allvars.local.lua @@ -0,0 +1,4 @@ +prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles") +load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0")) + +setenv("SRW_ENV", "regional_workflow") diff --git a/modulefiles/tasks/gaea-c5/python_srw.lua b/modulefiles/tasks/gaea-c5/python_srw.lua new file mode 100644 index 0000000000..673aa800b8 --- /dev/null +++ b/modulefiles/tasks/gaea-c5/python_srw.lua @@ -0,0 +1,5 @@ +unload("miniconda3") +prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles") +load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0")) + +setenv("SRW_ENV", "workflow_tools") diff --git a/modulefiles/tasks/gaea-c5/run_vx.local.lua b/modulefiles/tasks/gaea-c5/run_vx.local.lua new file mode 100644 index 0000000000..5979a8db96 --- /dev/null +++ b/modulefiles/tasks/gaea-c5/run_vx.local.lua @@ -0,0 +1,6 @@ +--[[ +Compiler-specific modules are used for met and metplus libraries +--]] +load(pathJoin("met", os.getenv("met_ver") or "10.1.2")) +load(pathJoin("metplus", os.getenv("metplus_ver") or "4.1.3")) +load("python_srw") diff --git a/modulefiles/wflow_gaea-c5.lua b/modulefiles/wflow_gaea-c5.lua new file mode 100644 index 0000000000..12467dfd74 --- /dev/null +++ b/modulefiles/wflow_gaea-c5.lua @@ -0,0 +1,21 @@ +help([[ +This module loads python environement for running the UFS SRW App on +the NOAA RDHPC machine Gaea C5 +]]) + +whatis([===[Loads libraries needed for running the UFS SRW App on gaea ]===]) + +unload("python") +load("set_pythonpath") +prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles") +load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0")) +prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/rocoto/modulefiles") +load("rocoto") + +pushenv("MKLROOT", "/opt/intel/oneapi/mkl/2023.1.0/") + +if mode() == "load" then + LmodMsgRaw([===[Please do the following to activate conda: + > conda activate workflow_tools +]===]) +end diff --git a/tests/WE2E/machine_suites/coverage.gaea-c5 b/tests/WE2E/machine_suites/coverage.gaea-c5 new file mode 100644 index 0000000000..4ff7f61f3c --- /dev/null +++ b/tests/WE2E/machine_suites/coverage.gaea-c5 @@ -0,0 +1,11 @@ +community +custom_ESGgrid_NewZealand_3km +grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta +grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_RAP +grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km +grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR +grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta +grid_SUBCONUS_Ind_3km_ics_RAP_lbcs_RAP_suite_RRFS_v1beta_plot +nco_ensemble +nco_grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km diff --git a/tests/WE2E/setup_WE2E_tests.sh b/tests/WE2E/setup_WE2E_tests.sh index 309c755966..0bfe5c2a56 100755 --- a/tests/WE2E/setup_WE2E_tests.sh +++ b/tests/WE2E/setup_WE2E_tests.sh @@ -45,7 +45,7 @@ function usage { } -machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud ) +machines=( hera jet cheyenne derecho orion wcoss2 gaea gaea-c5 odin singularity macos noaacloud ) if [ "$1" = "-h" ] ; then usage ; fi [[ $# -le 2 ]] && usage diff --git a/tests/build.sh b/tests/build.sh index c6cbe89c1d..b1d7a00091 100755 --- a/tests/build.sh +++ b/tests/build.sh @@ -20,8 +20,8 @@ function usage() { echo exit 1 } - -machines=( hera jet cheyenne derecho orion hercules wcoss2 gaea odin singularity macos noaacloud ) + +machines=( hera jet cheyenne derecho orion hercules wcoss2 gaea gaea-c5 odin singularity macos noaacloud ) [[ $# -gt 4 ]] && usage diff --git a/ush/load_modules_run_task.sh b/ush/load_modules_run_task.sh index d3806b279c..24c8de80c3 100755 --- a/ush/load_modules_run_task.sh +++ b/ush/load_modules_run_task.sh @@ -188,6 +188,7 @@ module list if [ -n "${SRW_ENV:-}" ] ; then set +u + conda deactivate conda activate ${SRW_ENV} set -u fi diff --git a/ush/machine/gaea-c5.yaml b/ush/machine/gaea-c5.yaml new file mode 100644 index 0000000000..86653be7b5 --- /dev/null +++ b/ush/machine/gaea-c5.yaml @@ -0,0 +1,54 @@ +platform: + WORKFLOW_MANAGER: rocoto + NCORES_PER_NODE: 128 + SCHED: slurm + TEST_CCPA_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc + TEST_MRMS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc + TEST_NDAS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc + DOMAIN_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen + QUEUE_DEFAULT: normal + QUEUE_FCST: normal + QUEUE_HPSS: normal + REMOVE_MEMORY: True + PARTITION_HPSS: eslogin_c5 + RUN_CMD_FCST: srun --export=ALL -n ${PE_MEMBER01} + RUN_CMD_POST: srun --export=ALL -n $nprocs + RUN_CMD_PRDGEN: srun --export=ALL -n $nprocs + RUN_CMD_SERIAL: time + RUN_CMD_UTILS: srun --export=ALL -n $nprocs + SCHED_NATIVE_CMD: --clusters=c5 --partition=batch --export=NONE + SCHED_NATIVE_CMD_HPSS: --clusters=es --partition=eslogin_c5 --export=NONE + PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' + TEST_EXTRN_MDL_SOURCE_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data + TEST_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen + TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir + TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir + TEST_VX_FCST_INPUT_BASEDIR: '{{ "/lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' + FIXaer: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_aer + FIXgsi: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_gsi + FIXgsm: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_am + FIXlut: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_lut + FIXorg: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_orog + FIXsfc: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo + FIXshp: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/NaturalEarth + EXTRN_MDL_DATA_STORES: aws +data: + ics_lbcs: + FV3GFS: + nemsio: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh} + grib2: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh} + netcdf: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/netcdf/${yyyymmdd}${hh} + RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh} + HRRR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh} + RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh} + GSMGFS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh} +rocoto: + tasks: + metatask_run_ensemble: + task_run_fcst_mem#mem#: + cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}' + native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}' + nodes: + nnodes: + nodesize: + ppn: diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 8f71e73d45..5acf9d5ce9 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -133,7 +133,7 @@ def download_file(url): # -c continue previous attempt # -T timeout seconds # -t number of tries - cmd = f"wget -q -c -T 10 -t 2 {url}" + cmd = f"wget -q -c -T 15 -t 2 {url}" logging.debug(f"Running command: \n {cmd}") try: subprocess.run( diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 385c061844..e73f57c2f7 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -4,7 +4,7 @@ valid_vals_RUN_ENVIR: ["nco", "community"] valid_vals_VERBOSE: [True, False] valid_vals_DEBUG: [True, False] -valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"] +valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA-C5"] valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"] valid_vals_FCST_MODEL: ["ufs-weather-model"] valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"]