From aae49a096ec581b210bb367beb724df149978fd1 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Fri, 14 Nov 2025 16:24:03 -0700 Subject: [PATCH 1/2] fsurdat validation tool --- .../mksurfdata_esmf/validate_fsurdat_files.sh | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100755 tools/mksurfdata_esmf/validate_fsurdat_files.sh diff --git a/tools/mksurfdata_esmf/validate_fsurdat_files.sh b/tools/mksurfdata_esmf/validate_fsurdat_files.sh new file mode 100755 index 0000000000..b1e68ad233 --- /dev/null +++ b/tools/mksurfdata_esmf/validate_fsurdat_files.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +# Validation/verification of new fsurdat files +# -------------------------------------------- +# WRITTEN by slevis after discussions with ekluzek. +# DATE of original script 2025/11/11. +# +# LOCATION: I first used this script in the directory +# .../inputdata/lnd/clm2/surfdata_esmf/ctsm5.4.0/validation +# +# CAVEAT: For new CTSM versions, use this script as a template with the +# understanding that aspects of the code will need to change. +# +# PURPOSE and DETAILS +# ------------------- +# This script +# 1) uses cprnc to compare ctsm5.4 versus ctsm5.3 files by generating +# cprnc.out files. +# 2) greps for fields with differences (RMS or NORMALIZED) that are >=1. +# The strictest grep threshold that I found isolates potentially +# unexpected changes is E-03. I determined this empirically using two +# types of problematic fsurdat files from the recent past: +# - No LAI, SAI, and heights for pfts 15 and 16. +# - No soil textures in parts of the world in unstructured grids. +# These "unexpected" fields appear alongside expected diffs (discussed +# below) when grepping for E+. Grepping for less than E-03 starts to +# capture fields with smaller differences and is likely to miss +# unexpected problematic fields. +# +# Step after running the script +# ----------------------------- +# Interactively and iteratively build this grep command to confirm that +# all fields in the script's grep output are expected. This list of +# fields is ctsm5.4-specific: +# >>> grep NORM grep_E+_surfdata_cprnc.out | grep -v ROOF | grep -v WALL | grep -v URBAN | grep -v BUILDING | grep -v abm | grep -v CANYON | grep -v CONST_HARVEST | grep -v ROAD | grep -v UNREPRESENTED_PFT | grep -v PCT_NATVEG +# +# If the grep command reveals unexpected fields, investigate. +# +# ----------------------------- +# +# Separate subjective comparison +# ------------------------------ +# >>> ncdiff surfdata_new.nc surfdata_old.nc surfdata_new_vs_old.nc +# >>> ncview surfdata_new_vs_old.nc +# Focus on fields with larger RMS diffs in the cprnc output. +# For ctsm5.4 I got approval for expected diffs from +# - Keith Oleson for diffs in urban fields +# - Peter Lawrence for diffs in const_harvest, unrep_pft, pct_natveg, pct_nat_pft +# - Fang Li (a few months ago) for diffs in abm. +# ncvis works like ncview for unstructured grids (e.g. ne30), though it +# seems to crash when I give it a "diff" file generated by ncdiff. +# +# Another validation step +# ----------------------- +# Run mksurfdata_esmf with a different number of processors and confirm +# bit-for-bit same results. + +echo "starting grids loop" + +# The first loop of grids has different ctsm5.3 date stamp. +# Skip ne3np4.pg2 as present only in ctsm5.4. +grids=("ne3np4") + +for grid in "${grids[@]}" + + do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_1850_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_1850_78pfts_c240925.nc >& surfdata_$grid\_hist_1850_78pfts_ctsm5.4v5.3.cprnc.out + /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_78pfts_c240925.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + echo "done $grid" + +done + +# Second loop of grids. +grids=("4x5" "10x15" "0.9x1.25" "1.9x2.5" "mpasa120" "mpasa480" "ne16np4.pg3" "ne120np4.pg3" "ne3np4.pg3" "ne30np4" "ne30np4.pg2" "ne30np4.pg3") +for grid in "${grids[@]}" + + do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_1850_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_1850_78pfts_c240908.nc >& surfdata_$grid\_hist_1850_78pfts_ctsm5.4v5.3.cprnc.out + /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_78pfts_c240908.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + echo "done $grid" + +done + +# Third loop of grids. +# Skip mpasa30 as present only in ctsm5.4. +# Skip 1850 as only 2000 is present. +grids=("mpasa3p75" "mpasa15" "mpasa60") +for grid in "${grids[@]}" + + do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_16pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_16pfts_c240908.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + echo "done $grid" + +done + +# grep for E+ to catch larger diffs. +for file in surfdata_*cprnc.out + do grep -H NORM $file | grep 'E+' >> grep_E+_surfdata_cprnc.out +done + +exit From eca1eb0a2b435c094b525f335b650e2eddae6bf0 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Tue, 18 Nov 2025 12:20:34 -0700 Subject: [PATCH 2/2] Revised validation tool based on Erik's code review --- .../mksurfdata_esmf/validate_fsurdat_files.sh | 74 +++++++++++++------ 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/tools/mksurfdata_esmf/validate_fsurdat_files.sh b/tools/mksurfdata_esmf/validate_fsurdat_files.sh index b1e68ad233..2158a30435 100755 --- a/tools/mksurfdata_esmf/validate_fsurdat_files.sh +++ b/tools/mksurfdata_esmf/validate_fsurdat_files.sh @@ -3,18 +3,18 @@ # Validation/verification of new fsurdat files # -------------------------------------------- # WRITTEN by slevis after discussions with ekluzek. -# DATE of original script 2025/11/11. # -# LOCATION: I first used this script in the directory +# LOCATION: slevis first used this script in the directory # .../inputdata/lnd/clm2/surfdata_esmf/ctsm5.4.0/validation # # CAVEAT: For new CTSM versions, use this script as a template with the -# understanding that aspects of the code will need to change. +# understanding that aspects of the code will need to change. Search +# the string "current" for items that may need to change in the future. # # PURPOSE and DETAILS # ------------------- # This script -# 1) uses cprnc to compare ctsm5.4 versus ctsm5.3 files by generating +# 1) uses cprnc to compare NEW_VERSION versus OLD_VERSION files by generating # cprnc.out files. # 2) greps for fields with differences (RMS or NORMALIZED) that are >=1. # The strictest grep threshold that I found isolates potentially @@ -31,10 +31,14 @@ # ----------------------------- # Interactively and iteratively build this grep command to confirm that # all fields in the script's grep output are expected. This list of -# fields is ctsm5.4-specific: +# fields here is ctsm5.4-specific: # >>> grep NORM grep_E+_surfdata_cprnc.out | grep -v ROOF | grep -v WALL | grep -v URBAN | grep -v BUILDING | grep -v abm | grep -v CANYON | grep -v CONST_HARVEST | grep -v ROAD | grep -v UNREPRESENTED_PFT | grep -v PCT_NATVEG # # If the grep command reveals unexpected fields, investigate. +# The list of fields to check depends on which fields you expect to have +# answer changes. The magnitude of the differences will depend on the +# specifics of what changed. Ensure you only see the answer changes that +# you expect. # # ----------------------------- # @@ -42,50 +46,74 @@ # ------------------------------ # >>> ncdiff surfdata_new.nc surfdata_old.nc surfdata_new_vs_old.nc # >>> ncview surfdata_new_vs_old.nc -# Focus on fields with larger RMS diffs in the cprnc output. -# For ctsm5.4 I got approval for expected diffs from -# - Keith Oleson for diffs in urban fields -# - Peter Lawrence for diffs in const_harvest, unrep_pft, pct_natveg, pct_nat_pft -# - Fang Li (a few months ago) for diffs in abm. -# ncvis works like ncview for unstructured grids (e.g. ne30), though it -# seems to crash when I give it a "diff" file generated by ncdiff. +# - Focus on fields with larger RMS diffs in the cprnc output. +# - ncvis works like ncview for unstructured grids (e.g. ne30), though +# slevis found ncvis to crash when reading a "diff" file generated by ncdiff. # # Another validation step # ----------------------- # Run mksurfdata_esmf with a different number of processors and confirm # bit-for-bit same results. +# Settings to be used in the comparisons below. +# Paths are hardwired to derecho currently. +newdatestamp=c251022 # USER DEFINED +newdir=ctsm5.4.0 # USER DEFINED +olddir=ctsm5.3.0 # USER DEFINED +olddatestamp=c240908 # USER DEFINED +olddatestamp_ne3np4=c240925 # USER DEFINED +cimetoolspath=/glade/campaign/cesm/cesmdata/cseg/tools/cime/tools +CPRNC=$cimetoolspath/cprnc/cprnc + echo "starting grids loop" -# The first loop of grids has different ctsm5.3 date stamp. -# Skip ne3np4.pg2 as present only in ctsm5.4. +# The first loop of grids (unlike the other loops) uses olddatestamp_ne3np4 currently. +# Skip ne3np4.pg2 as present only in NEW_VERSION currently so may wish to add in future versions. grids=("ne3np4") for grid in "${grids[@]}" - do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_1850_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_1850_78pfts_c240925.nc >& surfdata_$grid\_hist_1850_78pfts_ctsm5.4v5.3.cprnc.out - /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_78pfts_c240925.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + # 1850_78pft files + do $CPRNC ../surfdata_$grid\_hist_1850_78pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_1850_78pfts_$olddatestamp_ne3np4.nc >& surfdata_$grid\_hist_1850_78pfts_$newdir\_vs_$olddir.cprnc.out + # 2000_78pft files + $CPRNC ../surfdata_$grid\_hist_2000_78pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_2000_78pfts_$olddatestamp_ne3np4.nc >& surfdata_$grid\_hist_2000_78pfts_$newdir\_vs_$olddir.cprnc.out echo "done $grid" done # Second loop of grids. -grids=("4x5" "10x15" "0.9x1.25" "1.9x2.5" "mpasa120" "mpasa480" "ne16np4.pg3" "ne120np4.pg3" "ne3np4.pg3" "ne30np4" "ne30np4.pg2" "ne30np4.pg3") +grids=("C96" "360x720cru" "4x5" "10x15" "0.9x1.25" "1.9x2.5" "mpasa120" "mpasa480" "ne16np4.pg3" "ne120np4.pg3" "ne3np4.pg3" "ne30np4" "ne30np4.pg2" "ne30np4.pg3") for grid in "${grids[@]}" - do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_1850_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_1850_78pfts_c240908.nc >& surfdata_$grid\_hist_1850_78pfts_ctsm5.4v5.3.cprnc.out - /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_78pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_78pfts_c240908.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + # 1850_78pft files + do $CPRNC ../surfdata_$grid\_hist_1850_78pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_1850_78pfts_$olddatestamp.nc >& surfdata_$grid\_hist_1850_78pfts_$newdir\_vs_$olddir.cprnc.out + # 2000_78pft files + $CPRNC ../surfdata_$grid\_hist_2000_78pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_2000_78pfts_$olddatestamp.nc >& surfdata_$grid\_hist_2000_78pfts_$newdir\_vs_$olddir.cprnc.out echo "done $grid" done # Third loop of grids. -# Skip mpasa30 as present only in ctsm5.4. -# Skip 1850 as only 2000 is present. -grids=("mpasa3p75" "mpasa15" "mpasa60") +# Skip 1850 as only 2000 is present currently. +# Skip mpasa30 as present only in NEW_VERSION currently so may wish to add in future versions. +# Skip mpasa3p75 because cprnc runs out of memory at that resolution currently. +grids=("mpasa60" "mpasa15") +for grid in "${grids[@]}" + + # 2000_16pft files + do $CPRNC ../surfdata_$grid\_hist_2000_16pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_2000_16pfts_$olddatestamp.nc >& surfdata_$grid\_hist_2000_16pfts_$newdir\_vs_$olddir.cprnc.out + echo "done $grid" + +done + +# Fourth loop of grids: for 1979 files. +# Skip ne0np4.NATL.ne30x8 and ne120np4.pg3 as present only in NEW_VERSION currently so may wish to add in future versions. +grids=("ne0np4.ARCTICGRIS.ne30x8" "ne0np4.ARCTIC.ne30x4" "ne0np4CONUS.ne30x8" "ne0np4.POLARCAP.ne30x4") + for grid in "${grids[@]}" - do /glade/campaign/cesm/cesmdata/cseg/tools/cime/tools/cprnc/cprnc ../surfdata_$grid\_hist_2000_16pfts_c251022.nc ../../ctsm5.3.0/surfdata_$grid\_hist_2000_16pfts_c240908.nc >& surfdata_$grid\_hist_2000_78pfts_ctsm5.4v5.3.cprnc.out + # 1979_78pft files + do $CPRNC ../surfdata_$grid\_hist_1979_78pfts_$newdatestamp.nc ../../$olddir/surfdata_$grid\_hist_1979_78pfts_$olddatestamp.nc >& surfdata_$grid\_hist_1979_78pfts_$newdir\_vs_$olddir.cprnc.out echo "done $grid" done