Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,18 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel")
# Replace -xHost or -xCORE-AVX2 with -xCORE-AVX-I for certain files (following FV3/gfsphysics/makefile)
# for bit-for-bit reproducibility with non-CCPP builds. These may go in the future once the CCPP solution
# is fully accepted.
set(CMAKE_Fortran_FLAGS_LOPT ${CMAKE_Fortran_FLAGS})
set(CMAKE_Fortran_FLAGS_LOPT1 ${CMAKE_Fortran_FLAGS})
string(REPLACE "-xHOST" "-xCORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT
"${CMAKE_Fortran_FLAGS_LOPT}")
CMAKE_Fortran_FLAGS_LOPT1
"${CMAKE_Fortran_FLAGS_LOPT1}")
string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT
"${CMAKE_Fortran_FLAGS_LOPT}")
CMAKE_Fortran_FLAGS_LOPT1
"${CMAKE_Fortran_FLAGS_LOPT1}")
string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT
"${CMAKE_Fortran_FLAGS_LOPT}")
CMAKE_Fortran_FLAGS_LOPT1
"${CMAKE_Fortran_FLAGS_LOPT1}")
SET_SOURCE_FILES_PROPERTIES(./physics/radiation_aerosols.f
PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT} -r8 -ftz")
PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT1} -r8 -ftz")
# Force consistent results of math calculations for MG microphysics;
# in Debug/Bitforbit) mode; without this flag, the results of the
# intrinsic gamma function are different for the non-CCPP and CCPP
Expand All @@ -182,16 +182,6 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel")
# For CCPP acceptance: selective reduction of optimization flags, hopefully
# to be removed once established that this is not a reasonable approach.
if (TRANSITION)
# Replace "CORE-AVX2" with "CORE-AVX-I"
set(CMAKE_Fortran_FLAGS_LOPT ${CMAKE_Fortran_FLAGS})
string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT
"${CMAKE_Fortran_FLAGS_LOPT}")
string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT
"${CMAKE_Fortran_FLAGS_LOPT}")
SET_SOURCE_FILES_PROPERTIES(./physics/sflx.f
PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT}")
# Replace "-no-prec-div -no-prec-sqrt" with "-prec-div -prec-sqrt",
# replace "CORE-AVX2" with "CORE-AVX-I"
set(CMAKE_Fortran_FLAGS_LOPT2 ${CMAKE_Fortran_FLAGS})
Expand All @@ -207,8 +197,20 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel")
string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT2
"${CMAKE_Fortran_FLAGS_LOPT2}")
SET_SOURCE_FILES_PROPERTIES(./physics/satmedmfvdif.F
SET_SOURCE_FILES_PROPERTIES(./physics/module_gfdl_cloud_microphys.F90
./physics/sflx.f
./physics/satmedmfvdif.F
PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT2}")
# Replace "CORE-AVX2" with "CORE-AVX-I"
set(CMAKE_Fortran_FLAGS_LOPT3 ${CMAKE_Fortran_FLAGS})
string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT3
"${CMAKE_Fortran_FLAGS_LOPT3}")
string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I"
CMAKE_Fortran_FLAGS_LOPT3
"${CMAKE_Fortran_FLAGS_LOPT3}")
SET_SOURCE_FILES_PROPERTIES(./physics/gfdl_fv_sat_adj.F90
PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT3}")
endif (TRANSITION)

else (PROJECT STREQUAL "CCPP-FV3")
Expand Down
117 changes: 67 additions & 50 deletions physics/gfdl_fv_sat_adj.F90
Original file line number Diff line number Diff line change
Expand Up @@ -255,54 +255,54 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je,
implicit none

! Interface variables
real(kind=kind_dyn), intent(in) :: mdt
real(kind=kind_dyn), intent(in) :: zvir
integer, intent(in) :: is
integer, intent(in) :: ie
integer, intent(in) :: isd
integer, intent(in) :: ied
integer, intent(in) :: kmp
integer, intent(in) :: km
integer, intent(in) :: kmdelz
integer, intent(in) :: js
integer, intent(in) :: je
integer, intent(in) :: jsd
integer, intent(in) :: jed
integer, intent(in) :: ng
logical, intent(in) :: hydrostatic
logical, intent(in) :: fast_mp_consv
real(kind=kind_dyn), intent(inout) :: te0_2d(is:ie, js:je)
real(kind=kind_dyn), intent( out) :: te0(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qv(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: ql(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qi(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qr(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qs(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qg(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(in) :: hs(isd:ied, jsd:jed)
real(kind=kind_dyn), intent(in) :: peln(is:ie, 1:km+1, js:je)
real(kind=kind_dyn), intent(in) :: mdt
real(kind=kind_dyn), intent(in) :: zvir
integer, intent(in) :: is
integer, intent(in) :: ie
integer, intent(in) :: isd
integer, intent(in) :: ied
integer, intent(in) :: kmp
integer, intent(in) :: km
integer, intent(in) :: kmdelz
integer, intent(in) :: js
integer, intent(in) :: je
integer, intent(in) :: jsd
integer, intent(in) :: jed
integer, intent(in) :: ng
logical, intent(in) :: hydrostatic
logical, intent(in) :: fast_mp_consv
real(kind=kind_dyn), intent(inout) :: te0_2d(is:ie, js:je)
real(kind=kind_dyn), intent( out) :: te0(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qv(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: ql(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qi(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qr(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qs(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: qg(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(in) :: hs(isd:ied, jsd:jed)
real(kind=kind_dyn), intent(in) :: peln(is:ie, 1:km+1, js:je)
! For hydrostatic build, kmdelz=1, otherwise kmdelz=km (see fv_arrays.F90)
real(kind=kind_dyn), intent(in) :: delz(isd:ied, jsd:jed, 1:kmdelz)
real(kind=kind_dyn), intent(in) :: delp(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: pt(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: pkz(is:ie, js:je, 1:km)
real(kind=kind_dyn), intent(in) :: delz(isd:ied, jsd:jed, 1:kmdelz)
real(kind=kind_dyn), intent(in) :: delp(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: pt(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: pkz(is:ie, js:je, 1:km)
#ifdef USE_COND
real(kind=kind_dyn), intent(inout) :: q_con(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: q_con(isd:ied, jsd:jed, 1:km)
#else
real(kind=kind_dyn), intent(inout) :: q_con(isd:isd, jsd:jsd, 1)
real(kind=kind_dyn), intent(inout) :: q_con(isd:isd, jsd:jsd, 1)
#endif
real(kind=kind_dyn), intent(in) :: akap
real(kind=kind_dyn), intent(in) :: akap
#ifdef MOIST_CAPPA
real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1:km)
real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1:km)
#else
real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1)
real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1)
#endif
! DH* WARNING, allocation in fv_arrays.F90 is area(isd_2d:ied_2d, jsd_2d:jed_2d),
! where normally isd_2d = isd etc, but for memory optimization, these can be set
! to isd_2d=0, ied_2d=-1 etc. I don't believe this optimization is actually used,
! as it would break a whole lot of code (including the one below)!
! Assume thus that isd_2d = isd etc.
real(kind_grid), intent(in) :: area(isd:ied, jsd:jed)
real(kind_grid), intent(in) :: area(isd:ied, jsd:jed)
real(kind=kind_dyn), intent(inout) :: dtdt(is:ie, js:je, 1:km)
logical, intent(in) :: out_dt
logical, intent(in) :: last_step
Expand All @@ -314,6 +314,10 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je,

! Local variables
real(kind=kind_dyn), dimension(is:ie,js:je) :: dpln
#ifdef TRANSITION
! For bit-for-bit reproducibility
real(kind=kind_dyn), volatile :: volatile_var
#endif
integer :: kdelz
integer :: k, j, i

Expand All @@ -331,6 +335,9 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je,
!$OMP ql,qv,te0,fast_mp_consv, &
!$OMP hydrostatic,ng,zvir,pkz, &
!$OMP akap,te0_2d) &
#ifdef TRANSITION
!$OMP private(volatile_var) &
#endif
!$OMP private(k,j,i,kdelz,dpln)
#endif

Expand All @@ -356,9 +363,19 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je,
do j=js,je
do i=is,ie
#ifdef MOIST_CAPPA
#ifdef TRANSITION
volatile_var = log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k))

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is like magic to me that you are able to narrow this down. Hopefully I can learn how you do this some day!

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed!

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks ... that took a few hours, line-by-line print statements ...

pkz(i,j,k) = exp(cappa(i,j,k)*volatile_var)
#else
pkz(i,j,k) = exp(cappa(i,j,k)*log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k)))
#endif
#else
#ifdef TRANSITION
volatile_var = log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k))
pkz(i,j,k) = exp(akap*volatile_var)
#else
pkz(i,j,k) = exp(akap*log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k)))
#endif
#endif
enddo
enddo
Expand Down Expand Up @@ -616,21 +633,21 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
! -----------------------------------------------------------------------
!> - Update latend heat coefficient.
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
lhl (i) = lv00 + d0_vap * pt1 (i)
lhi (i) = li00 + dc_ice * pt1 (i)
lcp2 (i) = lhl (i) / cvm (i)
icp2 (i) = lhi (i) / cvm (i)
tcp3 (i) = lcp2 (i) + icp2 (i) * min (1., dim (tice, pt1 (i)) / 48.)
enddo
if (last_step) then
if (last_step) then
! -----------------------------------------------------------------------
!> - condensation/evaporation between water vapor and cloud water, last time step
!! enforce upper (no super_sat) & lower (critical rh) bounds.
! final iteration:
! -----------------------------------------------------------------------
call wqs2_vect (is, ie, pt1, den, wqsat, dq2dt)
do i = is, ie
call wqs2_vect (is, ie, pt1, den, wqsat, dq2dt)
do i = is, ie
dq0 = (qv (i, j) - wqsat (i)) / (1. + tcp3 (i) * dq2dt (i))
if (dq0 > 0.) then ! remove super - saturation, prevent super saturation over water
src (i) = dq0
Expand All @@ -651,17 +668,17 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
! -----------------------------------------------------------------------
!> - Update latend heat coefficient.
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
lhl (i) = lv00 + d0_vap * pt1 (i)
lhi (i) = li00 + dc_ice * pt1 (i)
lcp2 (i) = lhl (i) / cvm (i)
icp2 (i) = lhi (i) / cvm (i)
enddo
endif
endif
! -----------------------------------------------------------------------
!> - Homogeneous freezing of cloud water to cloud ice.
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
dtmp = t_wfr - pt1 (i) ! [ - 40, - 48]
if (ql (i, j) > 0. .and. dtmp > 0.) then
sink (i) = min (ql (i, j), ql (i, j) * dtmp * 0.125, dtmp / icp2 (i))
Expand All @@ -676,14 +693,14 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
! -----------------------------------------------------------------------
!> - Update latend heat coefficient.
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
lhi (i) = li00 + dc_ice * pt1 (i)
icp2 (i) = lhi (i) / cvm (i)
enddo
! -----------------------------------------------------------------------
!> - bigg mechanism (heterogeneous freezing of cloud water to cloud ice).
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
tc = tice0 - pt1 (i)
if (ql (i, j) > 0.0 .and. tc > 0.) then
sink (i) = 3.3333e-10 * dt_bigg * (exp (0.66 * tc) - 1.) * den (i) * ql (i, j) ** 2
Expand All @@ -699,7 +716,7 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
! -----------------------------------------------------------------------
!> - Update latend heat coefficient.
! -----------------------------------------------------------------------
do i = is, ie
do i = is, ie
lhi (i) = li00 + dc_ice * pt1 (i)
icp2 (i) = lhi (i) / cvm (i)
enddo
Expand Down Expand Up @@ -866,11 +883,11 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
! -----------------------------------------------------------------------
!> - Compute cloud fraction.
! -----------------------------------------------------------------------
if (do_qa .and. last_step) then
if (do_qa .and. last_step) then
! -----------------------------------------------------------------------
!> - If it is the last step, combine water species.
! -----------------------------------------------------------------------
if (rad_snow) then
if (rad_snow) then
if (rad_graupel) then
do i = is, ie
q_sol (i) = qi (i, j) + qs (i, j) + qg (i, j)
Expand Down Expand Up @@ -977,8 +994,8 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te,
qa (i, j) = 0.
endif
enddo
endif
enddo ! end j loop
endif
enddo ! end j loop

end subroutine fv_sat_adj_work
!! @}
Expand Down
11 changes: 11 additions & 0 deletions physics/iccninterp.F90
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,17 @@ SUBROUTINE ciinterpol(me,npts,IDATE,FHOUR,jindx1,jindx2,ddy, &
ccnout(j,l)=ccnpm(j,1)
else
DO k=kcipl-1,1,-1
! DH* There is no backstop if this condition isn't met,
! i.e. i1 and i2 will have values determined by the
! previous code (line 178) - this leads to crashes in
! debug mode (out of bounds), for example for regression
! test fv3_stretched_nest_debug_moninq. For the time
! being, this is 'solved' by simply switching off ICCN
! if MG2/3 are not used (these are the only microphysics
! schemes that use the ICCN data); however, this doesn't
! mean that the code is correct for MG2/3, it just doesn't
! abort if the below condition isn't met, because the code
! is not tested in DEBUG mode. *DH
IF(prsl(j,l)>cipres(j,k)) then
i1=k
i2=min(k+1,kcipl)
Expand Down
17 changes: 2 additions & 15 deletions physics/module_gfdl_cloud_microphys.F90
Original file line number Diff line number Diff line change
Expand Up @@ -623,14 +623,7 @@ subroutine mpdrv (hydrostatic, uin, vin, w, delp, pt, qv, ql, qr, qi, qs, &
real, dimension (ktop:kbot) :: qv0, ql0, qr0, qi0, qs0, qg0, qa0
real, dimension (ktop:kbot) :: t0, den, den0, tz, p1, denfac
real, dimension (ktop:kbot) :: ccn, c_praut, m1_rain, m1_sol, m1
#if 0

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why u1 and v1 no longer need to be declared as volatile -- is it because you reduced optimization on the entire file?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right, reducing the optimization made this (and several other !DIR$ NOOPTIMIZE) unnecessary. Probably same performance hit by lowering xCORE-AVX2 to xCORE-AVX-I for the entire file as by using !DIR$ NOOPTIMIZE on selected routines. However, the !DIR$ NOOPTIMIZE worked for Intel 18 but didn't work for Intel 15.

!ifdef TRANSITION
! For bit-for-bit reproducibility
real, dimension (ktop:kbot) :: u0, v0, w1
real, dimension (ktop:kbot), volatile :: u1, v1
#else
real, dimension (ktop:kbot) :: u0, v0, u1, v1, w1
#endif

real :: cpaut, rh_adj, rh_rain
real :: r1, s1, i1, g1, rdt, ccn0
Expand Down Expand Up @@ -1393,10 +1386,7 @@ end subroutine revap_racc
! -----------------------------------------------------------------------

subroutine linear_prof (km, q, dm, z_var, h_var)
#if 0
!ifdef TRANSITION
!DIR$ NOOPTIMIZE
#endif

implicit none

integer, intent (in) :: km
Expand Down Expand Up @@ -1955,10 +1945,7 @@ end subroutine icloud

subroutine subgrid_z_proc (ktop, kbot, p1, den, denfac, dts, rh_adj, tz, qv, &
ql, qr, qi, qs, qg, qa, h_var, rh_rain)
#if 0
!ifdef TRANSITION
!DIR$ NOOPTIMIZE
#endif

implicit none

integer, intent (in) :: ktop, kbot
Expand Down
25 changes: 15 additions & 10 deletions physics/mp_thompson_hrrr_post.F90
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,22 @@ subroutine mp_thompson_hrrr_post_init(ncol, area, ttendlim, errmsg, errflg)
!dx = sqrt(area)

do i=1,ncol
!! DH* testing/development: limiters on temperature tendency
!! depending on the grid spacing - no limit for >50km *DH
!if (dx(i)<=5000) then
! mp_tend_lim(i) = 0.07 ! [K/s], 3-km HRRR value
!else if (dx(i)<=50000) then
! The column-dependent values that were set here previously
! are replaced with a single value set in the namelist
! input.nml.This value is independent of the grid spacing
! (as opposed to setting it here on a per-column basis).
! However, given that the timestep is the same for all grid
! columns and determined by the smallest grid spacing in
! the domain, it makes sense to use a single value.
!
! The values previously used in RAP/HRRR were
! mp_tend_lim(i) = 0.07 ! [K/s], 3-km HRRR value
! and
! mp_tend_lim(i) = 0.002 ! [K/s], 13-km RAP value
!else
! ! no limit for grid spacings >50km
! !mp_tend_lim(i) = 0.00006 ! [K/s], guess for >50km
! mp_tend_lim(i) = 1.0E+08
!end if
!
! Our testing with FV3 has shown thus far that 0.002 is
! too small for a 13km (C768) resolution and that 0.01
! works better. This is work in progress ...
mp_tend_lim(i) = ttendlim
end do

Expand Down