Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/exregional_make_ics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ case "${CCPP_PHYS_SUITE}" in
#
"FV3_GFS_2017_gfdlmp" | \
"FV3_GFS_2017_gfdlmp_regional" | \
"FV3_GFS_v16beta" | \
"FV3_GFS_v16" | \
"FV3_GFS_v15p2" | "FV3_CPT_v0" )
varmap_file="GFSphys_var_map.txt"
;;
Expand Down
2 changes: 1 addition & 1 deletion scripts/exregional_make_lbcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ case "${CCPP_PHYS_SUITE}" in
#
"FV3_GFS_2017_gfdlmp" | \
"FV3_GFS_2017_gfdlmp_regional" | \
"FV3_GFS_v16beta" | \
"FV3_GFS_v16" | \
"FV3_GFS_v15p2" | "FV3_CPT_v0" )
varmap_file="GFSphys_var_map.txt"
;;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="CONUS_25km_GFDLgrid"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="CONUS_3km_GFDLgrid"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_AK_13km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"

FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="6"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_AK_3km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"

FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="6"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_CONUS_13km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_CONUS_25km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_CONUS_3km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
44 changes: 44 additions & 0 deletions tests/baseline_configs/config.grid_RRFS_NA_3km.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
RUN_ENVIR="community"
PREEXISTING_DIR_METHOD="rename"

PREDEF_GRID_NAME="RRFS_NA_3km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_RRFS_v1alpha"

FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="6"

DATE_FIRST_CYCL="20190701"
DATE_LAST_CYCL="20190701"
CYCL_HRS=( "00" )

EXTRN_MDL_NAME_ICS="FV3GFS"
EXTRN_MDL_NAME_LBCS="FV3GFS"
USE_USER_STAGED_EXTRN_FILES="TRUE"

#########################################################################
# The following code/namelist/workflow setting changes are necessary to #
# run/optimize end-to-end experiments using the 3-km NA grid #
#########################################################################

# The model should be built in 32-bit mode (64-bit will result in much
# longer run times.

# Use k_split=2 and n_split=5, the previous namelist values (k_split=4
# and n_split=5) will result in significantly longer run times.

NNODES_MAKE_ICS="12"

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these settings specific to Hera? If we need to change these according to platform, we can put that kind of code in run_experiments.sh. That's where I've been doing platform-specific differentiation of settings.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeffBeck-NOAA Can better answer these questions since these changes came from him.

I think the problem is specific to the domain, which is very large and high-resolution, requiring more nodes for make_ics and make_lbcs for memory reasons. I assume that would mean this is required on all platforms.

I do think that these settings are less than ideal. Making PPN_MAKE_ICS and PPN_MAKE_LBCS lower means we will be under-utilizing the nodes.

We ddo need to have a separate conversation about making PPN_RUN_FCST (and maybe every PPN setting) platform specific in defaults. Right now we are under-utilizing nodes on most platforms using default settings. Maybe this can be rolled in to issue #452 since the OMP settings will have to be taken into account as well?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These settings are required for all platforms due to the domain size and resolution. The PPN_MAKE_ICS and PPN_MAKE_LBCS values are a specific chgres_cube requirement for large domains (need fewer processes but massive amounts of memory) due to an ESMF limitation/memory bug.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeffBeck-NOAA Thanks for the clarification. In that case should these be handled somewhere in the generate workflow calling tree in order to have these settings as a default for this domain specifically? Maybe that's something best handled in an issue and follow-up PR in the future?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mkavulich, yeah, this is where things can get really thorny. Do we want to fill the generate script with nested if/case statements for different domains, resolutions, platforms for changes to PPN for individual tasks? I wasn't sure if we wanted to go that route or just have users source the specific WE2E config.sh file when they want to run this domain? I think this definitely deserves an issue and potential follow-up PR.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's likely some variability between platforms, but as a first approximation, chgres_cube needs more nodes and less cores for memory management for this domain across all platforms. Since there are no NNODES_* or PPN_* settings currently defined in the set_predef_grid_params.sh script, I left the changes in the WE2E test. The test is designed for Hera, but could be run on any platform.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gsketefian I was able to run this test on Cheyenne as well (but the forecast step did not complete due to wallclock time, so I can't be sure if it would have fully run successfully).

I agree that k_split and n_split should likely be handled by domain, however, currently they are also set according to the physics scheme, which complicates things. I believe they should also be configurable via config.sh for maximum flexibility. But again, that complicates things.

I can take this change out of the current PR...more discussion definitely needs to happen, the question is do we keep this change for now or not.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mkavulich, if the NA 3-km WE2E test runs with the old k/n_split settings, then lets stick with them and deal with domain-, physics-, and platform-dependent settings in a later PR.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On AWS for the RRFS work, we run chgres on a single node that has 768 GB of memory. I wonder if there is a way to specify large memory nodes if available, but will leave that to you

NNODES_MAKE_LBCS="12"
PPN_MAKE_ICS="4"
PPN_MAKE_LBCS="4"
WTIME_MAKE_LBCS="01:00:00"

PPN_RUN_FCST="24"

NNODES_RUN_POST="6"
PPN_RUN_POST="12"

OMP_STACKSIZE_RUN_FCST="2048m"

###############################################################################
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_SUBCONUS_3km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"
FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="CONUS_25km_GFDLgrid"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"

FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="3"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREEXISTING_DIR_METHOD="rename"
PREDEF_GRID_NAME="RRFS_CONUS_25km"
QUILTING="TRUE"

CCPP_PHYS_SUITE="FV3_GFS_v16beta"
CCPP_PHYS_SUITE="FV3_GFS_v16"

FCST_LEN_HRS="06"
LBC_SPEC_INTVL_HRS="6"
Expand Down
13 changes: 7 additions & 6 deletions tests/baselines_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ grid_RRFS_AK_13km_RAP_RAP
grid_RRFS_AK_3km_FV3GFS_FV3GFS
grid_RRFS_AK_3km_RAP_RAP
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16beta
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16
grid_RRFS_CONUS_13km_ics_HRRR_lbcs_RAP_suite_GSD_SAR
grid_RRFS_CONUS_13km_ics_HRRR_lbcs_RAP_suite_HRRR
grid_RRFS_CONUS_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2
grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16beta
grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16
grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GSD_SAR
grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_RAP_suite_HRRR
grid_RRFS_CONUS_25km_ics_HRRR_lbcs_HRRR_suite_GSD_SAR
Expand All @@ -43,17 +43,18 @@ grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_HRRR
grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_RRFS_v1beta
grid_RRFS_CONUS_25km_modify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE
grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2
grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16beta
grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16
grid_RRFS_CONUS_3km_ics_HRRR_lbcs_RAP_suite_GFS_v15p2
grid_RRFS_CONUS_3km_ics_HRRR_lbcs_RAP_suite_GSD_SAR
grid_RRFS_CONUS_3km_ics_HRRR_lbcs_RAP_suite_HRRR
grid_RRFS_CONUS_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_RRFS_NA_13km
Comment thread
gsketefian marked this conversation as resolved.
grid_RRFS_SUBCONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16beta
grid_RRFS_NA_3km
grid_RRFS_SUBCONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16
grid_RRFS_SUBCONUS_3km_ics_HRRR_lbcs_RAP_suite_GFS_v15p2
grid_RRFS_SUBCONUS_3km_ics_HRRR_lbcs_RAP_suite_GSD_SAR
nco_ensemble
nco_grid_CONUS_25km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16beta
nco_grid_CONUS_25km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16
nco_grid_RRFS_CONUS_25km_ics_HRRR_lbcs_RAP_suite_GSD_SAR
nco_grid_RRFS_CONUS_25km_ics_HRRR_lbcs_RAP_suite_HRRR
nco_grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_2017_gfdlmp_regional
Expand All @@ -72,7 +73,7 @@ subhourly_post
subhourly_post_ensemble_2mems
suite_FV3_CPT_v0
suite_FV3_GFS_v15p2
suite_FV3_GFS_v16beta
suite_FV3_GFS_v16
suite_FV3_GSD_SAR
suite_FV3_GSD_v0
suite_FV3_RRFS_v1beta
2 changes: 1 addition & 1 deletion ush/config_defaults.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1641,7 +1641,7 @@ KMP_AFFINITY_RUN_FCST="scatter"
OMP_NUM_THREADS_RUN_FCST="4"
OMP_STACKSIZE_RUN_FCST="1024m"

CPUS_PER_TASK_RUN_FCST="4"
CPUS_PER_TASK_RUN_FCST="2"

KMP_AFFINITY_RUN_POST="scatter"
OMP_NUM_THREADS_RUN_POST="1"
Expand Down
42 changes: 42 additions & 0 deletions ush/set_predef_grid_params.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,48 @@ case ${PREDEF_GRID_NAME} in
fi
;;

#
#-----------------------------------------------------------------------
#
# Future operational RRFS domain with ~3km cell size.
#
#-----------------------------------------------------------------------
#
"RRFS_NA_3km")

GRID_GEN_METHOD="ESGgrid"

ESGgrid_LON_CTR=-107.5
ESGgrid_LAT_CTR=51.5

ESGgrid_DELX="3000.0"
ESGgrid_DELY="3000.0"

ESGgrid_NX=3640
ESGgrid_NY=2520

ESGgrid_WIDE_HALO_WIDTH=6

DT_ATMOS="${DT_ATMOS:-36}"

LAYOUT_X="${LAYOUT_X:-18}"
LAYOUT_Y="${LAYOUT_Y:-36}"
BLOCKSIZE="${BLOCKSIZE:-28}"

if [ "$QUILTING" = "TRUE" ]; then
WRTCMP_write_groups="1"
WRTCMP_write_tasks_per_group="144"
WRTCMP_output_grid="rotated_latlon"
WRTCMP_cen_lon="-112.0" #${ESGgrid_LON_CTR}"
WRTCMP_cen_lat="48.0" #${ESGgrid_LAT_CTR}"
WRTCMP_lon_lwr_left="-51.0"
WRTCMP_lat_lwr_left="-33.0"
WRTCMP_lon_upr_rght="51.0"
WRTCMP_lat_upr_rght="33.0"
WRTCMP_dlon="0.025" #$( printf "%.9f" $( bc -l <<< "(${ESGgrid_DELX}/${radius_Earth})*${degs_per_radian}" ) )
WRTCMP_dlat="0.025" #$( printf "%.9f" $( bc -l <<< "(${ESGgrid_DELY}/${radius_Earth})*${degs_per_radian}" ) )
fi
;;
esac
#
#-----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion ush/templates/FV3.input.yml
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ FV3_GFS_v15p2:
ldebug: False
surf_map_nml:

FV3_GFS_v16beta:
FV3_GFS_v16:
atmos_model_nml:
ccpp_suite: FV3_GFS_v16
fhmax: 240
Expand Down
2 changes: 1 addition & 1 deletion ush/templates/input.nml.FV3
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
hord_vt = 6
hydrostatic = .false.
io_layout = 1,1
k_split = 4
k_split = 2
ke_bg = 0.0
kord_mt = 9
kord_tm = -9
Expand Down
3 changes: 2 additions & 1 deletion ush/valid_param_vals.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ valid_vals_PREDEF_GRID_NAME=( \
"GSL_HAFSV0.A_3km" \
"GSD_HRRR_AK_50km" \
"RRFS_NA_13km" \
"RRFS_NA_3km" \
)
valid_vals_CCPP_PHYS_SUITE=( \
"FV3_CPT_v0" \
Expand All @@ -28,7 +29,7 @@ valid_vals_CCPP_PHYS_SUITE=( \
"FV3_GSD_SAR" \
"FV3_GSD_v0" \
"FV3_GFS_v15p2" \
"FV3_GFS_v16beta" \
"FV3_GFS_v16" \
"FV3_RRFS_v1beta" \
"FV3_RRFS_v1alpha" \
"FV3_HRRR" \
Expand Down