diff --git a/CMakeLists.txt b/CMakeLists.txt index a3b310dd9..16864dde8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,18 +145,18 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel") # Replace -xHost or -xCORE-AVX2 with -xCORE-AVX-I for certain files (following FV3/gfsphysics/makefile) # for bit-for-bit reproducibility with non-CCPP builds. These may go in the future once the CCPP solution # is fully accepted. - set(CMAKE_Fortran_FLAGS_LOPT ${CMAKE_Fortran_FLAGS}) + set(CMAKE_Fortran_FLAGS_LOPT1 ${CMAKE_Fortran_FLAGS}) string(REPLACE "-xHOST" "-xCORE-AVX-I" - CMAKE_Fortran_FLAGS_LOPT - "${CMAKE_Fortran_FLAGS_LOPT}") + CMAKE_Fortran_FLAGS_LOPT1 + "${CMAKE_Fortran_FLAGS_LOPT1}") string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I" - CMAKE_Fortran_FLAGS_LOPT - "${CMAKE_Fortran_FLAGS_LOPT}") + CMAKE_Fortran_FLAGS_LOPT1 + "${CMAKE_Fortran_FLAGS_LOPT1}") string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I" - CMAKE_Fortran_FLAGS_LOPT - "${CMAKE_Fortran_FLAGS_LOPT}") + CMAKE_Fortran_FLAGS_LOPT1 + "${CMAKE_Fortran_FLAGS_LOPT1}") SET_SOURCE_FILES_PROPERTIES(./physics/radiation_aerosols.f - PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT} -r8 -ftz") + PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT1} -r8 -ftz") # Force consistent results of math calculations for MG microphysics; # in Debug/Bitforbit) mode; without this flag, the results of the # intrinsic gamma function are different for the non-CCPP and CCPP @@ -182,16 +182,6 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel") # For CCPP acceptance: selective reduction of optimization flags, hopefully # to be removed once established that this is not a reasonable approach. if (TRANSITION) - # Replace "CORE-AVX2" with "CORE-AVX-I" - set(CMAKE_Fortran_FLAGS_LOPT ${CMAKE_Fortran_FLAGS}) - string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I" - CMAKE_Fortran_FLAGS_LOPT - "${CMAKE_Fortran_FLAGS_LOPT}") - string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I" - CMAKE_Fortran_FLAGS_LOPT - "${CMAKE_Fortran_FLAGS_LOPT}") - SET_SOURCE_FILES_PROPERTIES(./physics/sflx.f - PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT}") # Replace "-no-prec-div -no-prec-sqrt" with "-prec-div -prec-sqrt", # replace "CORE-AVX2" with "CORE-AVX-I" set(CMAKE_Fortran_FLAGS_LOPT2 ${CMAKE_Fortran_FLAGS}) @@ -207,8 +197,20 @@ elseif (${CMAKE_Fortran_COMPILER_ID} STREQUAL "Intel") string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I" CMAKE_Fortran_FLAGS_LOPT2 "${CMAKE_Fortran_FLAGS_LOPT2}") - SET_SOURCE_FILES_PROPERTIES(./physics/satmedmfvdif.F + SET_SOURCE_FILES_PROPERTIES(./physics/module_gfdl_cloud_microphys.F90 + ./physics/sflx.f + ./physics/satmedmfvdif.F PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT2}") + # Replace "CORE-AVX2" with "CORE-AVX-I" + set(CMAKE_Fortran_FLAGS_LOPT3 ${CMAKE_Fortran_FLAGS}) + string(REPLACE "-xCORE-AVX2" "-xCORE-AVX-I" + CMAKE_Fortran_FLAGS_LOPT3 + "${CMAKE_Fortran_FLAGS_LOPT3}") + string(REPLACE "-axSSE4.2,AVX,CORE-AVX2" "-axSSE4.2,AVX,CORE-AVX-I" + CMAKE_Fortran_FLAGS_LOPT3 + "${CMAKE_Fortran_FLAGS_LOPT3}") + SET_SOURCE_FILES_PROPERTIES(./physics/gfdl_fv_sat_adj.F90 + PROPERTIES COMPILE_FLAGS "${CMAKE_Fortran_FLAGS_LOPT3}") endif (TRANSITION) else (PROJECT STREQUAL "CCPP-FV3") diff --git a/physics/gfdl_fv_sat_adj.F90 b/physics/gfdl_fv_sat_adj.F90 index 78c5c850d..6c1564477 100644 --- a/physics/gfdl_fv_sat_adj.F90 +++ b/physics/gfdl_fv_sat_adj.F90 @@ -255,54 +255,54 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je, implicit none ! Interface variables - real(kind=kind_dyn), intent(in) :: mdt - real(kind=kind_dyn), intent(in) :: zvir - integer, intent(in) :: is - integer, intent(in) :: ie - integer, intent(in) :: isd - integer, intent(in) :: ied - integer, intent(in) :: kmp - integer, intent(in) :: km - integer, intent(in) :: kmdelz - integer, intent(in) :: js - integer, intent(in) :: je - integer, intent(in) :: jsd - integer, intent(in) :: jed - integer, intent(in) :: ng - logical, intent(in) :: hydrostatic - logical, intent(in) :: fast_mp_consv - real(kind=kind_dyn), intent(inout) :: te0_2d(is:ie, js:je) - real(kind=kind_dyn), intent( out) :: te0(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: qv(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: ql(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: qi(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: qr(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: qs(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: qg(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(in) :: hs(isd:ied, jsd:jed) - real(kind=kind_dyn), intent(in) :: peln(is:ie, 1:km+1, js:je) + real(kind=kind_dyn), intent(in) :: mdt + real(kind=kind_dyn), intent(in) :: zvir + integer, intent(in) :: is + integer, intent(in) :: ie + integer, intent(in) :: isd + integer, intent(in) :: ied + integer, intent(in) :: kmp + integer, intent(in) :: km + integer, intent(in) :: kmdelz + integer, intent(in) :: js + integer, intent(in) :: je + integer, intent(in) :: jsd + integer, intent(in) :: jed + integer, intent(in) :: ng + logical, intent(in) :: hydrostatic + logical, intent(in) :: fast_mp_consv + real(kind=kind_dyn), intent(inout) :: te0_2d(is:ie, js:je) + real(kind=kind_dyn), intent( out) :: te0(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: qv(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: ql(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: qi(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: qr(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: qs(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: qg(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(in) :: hs(isd:ied, jsd:jed) + real(kind=kind_dyn), intent(in) :: peln(is:ie, 1:km+1, js:je) ! For hydrostatic build, kmdelz=1, otherwise kmdelz=km (see fv_arrays.F90) - real(kind=kind_dyn), intent(in) :: delz(isd:ied, jsd:jed, 1:kmdelz) - real(kind=kind_dyn), intent(in) :: delp(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: pt(isd:ied, jsd:jed, 1:km) - real(kind=kind_dyn), intent(inout) :: pkz(is:ie, js:je, 1:km) + real(kind=kind_dyn), intent(in) :: delz(isd:ied, jsd:jed, 1:kmdelz) + real(kind=kind_dyn), intent(in) :: delp(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: pt(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: pkz(is:ie, js:je, 1:km) #ifdef USE_COND - real(kind=kind_dyn), intent(inout) :: q_con(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: q_con(isd:ied, jsd:jed, 1:km) #else - real(kind=kind_dyn), intent(inout) :: q_con(isd:isd, jsd:jsd, 1) + real(kind=kind_dyn), intent(inout) :: q_con(isd:isd, jsd:jsd, 1) #endif - real(kind=kind_dyn), intent(in) :: akap + real(kind=kind_dyn), intent(in) :: akap #ifdef MOIST_CAPPA - real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1:km) + real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1:km) #else - real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1) + real(kind=kind_dyn), intent(inout) :: cappa(isd:ied, jsd:jed, 1) #endif ! DH* WARNING, allocation in fv_arrays.F90 is area(isd_2d:ied_2d, jsd_2d:jed_2d), ! where normally isd_2d = isd etc, but for memory optimization, these can be set ! to isd_2d=0, ied_2d=-1 etc. I don't believe this optimization is actually used, ! as it would break a whole lot of code (including the one below)! ! Assume thus that isd_2d = isd etc. - real(kind_grid), intent(in) :: area(isd:ied, jsd:jed) + real(kind_grid), intent(in) :: area(isd:ied, jsd:jed) real(kind=kind_dyn), intent(inout) :: dtdt(is:ie, js:je, 1:km) logical, intent(in) :: out_dt logical, intent(in) :: last_step @@ -314,6 +314,10 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je, ! Local variables real(kind=kind_dyn), dimension(is:ie,js:je) :: dpln +#ifdef TRANSITION + ! For bit-for-bit reproducibility + real(kind=kind_dyn), volatile :: volatile_var +#endif integer :: kdelz integer :: k, j, i @@ -331,6 +335,9 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je, !$OMP ql,qv,te0,fast_mp_consv, & !$OMP hydrostatic,ng,zvir,pkz, & !$OMP akap,te0_2d) & +#ifdef TRANSITION +!$OMP private(volatile_var) & +#endif !$OMP private(k,j,i,kdelz,dpln) #endif @@ -356,9 +363,19 @@ subroutine fv_sat_adj_run(mdt, zvir, is, ie, isd, ied, kmp, km, kmdelz, js, je, do j=js,je do i=is,ie #ifdef MOIST_CAPPA +#ifdef TRANSITION + volatile_var = log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k)) + pkz(i,j,k) = exp(cappa(i,j,k)*volatile_var) +#else pkz(i,j,k) = exp(cappa(i,j,k)*log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k))) +#endif +#else +#ifdef TRANSITION + volatile_var = log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k)) + pkz(i,j,k) = exp(akap*volatile_var) #else pkz(i,j,k) = exp(akap*log(rrg*delp(i,j,k)/delz(i,j,k)*pt(i,j,k))) +#endif #endif enddo enddo @@ -616,21 +633,21 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, ! ----------------------------------------------------------------------- !> - Update latend heat coefficient. ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie lhl (i) = lv00 + d0_vap * pt1 (i) lhi (i) = li00 + dc_ice * pt1 (i) lcp2 (i) = lhl (i) / cvm (i) icp2 (i) = lhi (i) / cvm (i) tcp3 (i) = lcp2 (i) + icp2 (i) * min (1., dim (tice, pt1 (i)) / 48.) enddo - if (last_step) then + if (last_step) then ! ----------------------------------------------------------------------- !> - condensation/evaporation between water vapor and cloud water, last time step !! enforce upper (no super_sat) & lower (critical rh) bounds. ! final iteration: ! ----------------------------------------------------------------------- - call wqs2_vect (is, ie, pt1, den, wqsat, dq2dt) - do i = is, ie + call wqs2_vect (is, ie, pt1, den, wqsat, dq2dt) + do i = is, ie dq0 = (qv (i, j) - wqsat (i)) / (1. + tcp3 (i) * dq2dt (i)) if (dq0 > 0.) then ! remove super - saturation, prevent super saturation over water src (i) = dq0 @@ -651,17 +668,17 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, ! ----------------------------------------------------------------------- !> - Update latend heat coefficient. ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie lhl (i) = lv00 + d0_vap * pt1 (i) lhi (i) = li00 + dc_ice * pt1 (i) lcp2 (i) = lhl (i) / cvm (i) icp2 (i) = lhi (i) / cvm (i) enddo - endif + endif ! ----------------------------------------------------------------------- !> - Homogeneous freezing of cloud water to cloud ice. ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie dtmp = t_wfr - pt1 (i) ! [ - 40, - 48] if (ql (i, j) > 0. .and. dtmp > 0.) then sink (i) = min (ql (i, j), ql (i, j) * dtmp * 0.125, dtmp / icp2 (i)) @@ -676,14 +693,14 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, ! ----------------------------------------------------------------------- !> - Update latend heat coefficient. ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie lhi (i) = li00 + dc_ice * pt1 (i) icp2 (i) = lhi (i) / cvm (i) enddo ! ----------------------------------------------------------------------- !> - bigg mechanism (heterogeneous freezing of cloud water to cloud ice). ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie tc = tice0 - pt1 (i) if (ql (i, j) > 0.0 .and. tc > 0.) then sink (i) = 3.3333e-10 * dt_bigg * (exp (0.66 * tc) - 1.) * den (i) * ql (i, j) ** 2 @@ -699,7 +716,7 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, ! ----------------------------------------------------------------------- !> - Update latend heat coefficient. ! ----------------------------------------------------------------------- - do i = is, ie + do i = is, ie lhi (i) = li00 + dc_ice * pt1 (i) icp2 (i) = lhi (i) / cvm (i) enddo @@ -866,11 +883,11 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, ! ----------------------------------------------------------------------- !> - Compute cloud fraction. ! ----------------------------------------------------------------------- - if (do_qa .and. last_step) then + if (do_qa .and. last_step) then ! ----------------------------------------------------------------------- !> - If it is the last step, combine water species. ! ----------------------------------------------------------------------- - if (rad_snow) then + if (rad_snow) then if (rad_graupel) then do i = is, ie q_sol (i) = qi (i, j) + qs (i, j) + qg (i, j) @@ -977,8 +994,8 @@ subroutine fv_sat_adj_work(mdt, zvir, is, ie, js, je, ng, hydrostatic, consv_te, qa (i, j) = 0. endif enddo - endif - enddo ! end j loop + endif + enddo ! end j loop end subroutine fv_sat_adj_work !! @} diff --git a/physics/iccninterp.F90 b/physics/iccninterp.F90 index e5740e6e5..c9df6f20c 100644 --- a/physics/iccninterp.F90 +++ b/physics/iccninterp.F90 @@ -219,6 +219,17 @@ SUBROUTINE ciinterpol(me,npts,IDATE,FHOUR,jindx1,jindx2,ddy, & ccnout(j,l)=ccnpm(j,1) else DO k=kcipl-1,1,-1 + ! DH* There is no backstop if this condition isn't met, + ! i.e. i1 and i2 will have values determined by the + ! previous code (line 178) - this leads to crashes in + ! debug mode (out of bounds), for example for regression + ! test fv3_stretched_nest_debug_moninq. For the time + ! being, this is 'solved' by simply switching off ICCN + ! if MG2/3 are not used (these are the only microphysics + ! schemes that use the ICCN data); however, this doesn't + ! mean that the code is correct for MG2/3, it just doesn't + ! abort if the below condition isn't met, because the code + ! is not tested in DEBUG mode. *DH IF(prsl(j,l)>cipres(j,k)) then i1=k i2=min(k+1,kcipl) diff --git a/physics/module_gfdl_cloud_microphys.F90 b/physics/module_gfdl_cloud_microphys.F90 index 3c61669e1..75bce087a 100644 --- a/physics/module_gfdl_cloud_microphys.F90 +++ b/physics/module_gfdl_cloud_microphys.F90 @@ -623,14 +623,7 @@ subroutine mpdrv (hydrostatic, uin, vin, w, delp, pt, qv, ql, qr, qi, qs, & real, dimension (ktop:kbot) :: qv0, ql0, qr0, qi0, qs0, qg0, qa0 real, dimension (ktop:kbot) :: t0, den, den0, tz, p1, denfac real, dimension (ktop:kbot) :: ccn, c_praut, m1_rain, m1_sol, m1 -#if 0 -!ifdef TRANSITION - ! For bit-for-bit reproducibility - real, dimension (ktop:kbot) :: u0, v0, w1 - real, dimension (ktop:kbot), volatile :: u1, v1 -#else real, dimension (ktop:kbot) :: u0, v0, u1, v1, w1 -#endif real :: cpaut, rh_adj, rh_rain real :: r1, s1, i1, g1, rdt, ccn0 @@ -1393,10 +1386,7 @@ end subroutine revap_racc ! ----------------------------------------------------------------------- subroutine linear_prof (km, q, dm, z_var, h_var) -#if 0 -!ifdef TRANSITION -!DIR$ NOOPTIMIZE -#endif + implicit none integer, intent (in) :: km @@ -1955,10 +1945,7 @@ end subroutine icloud subroutine subgrid_z_proc (ktop, kbot, p1, den, denfac, dts, rh_adj, tz, qv, & ql, qr, qi, qs, qg, qa, h_var, rh_rain) -#if 0 -!ifdef TRANSITION -!DIR$ NOOPTIMIZE -#endif + implicit none integer, intent (in) :: ktop, kbot diff --git a/physics/mp_thompson_hrrr_post.F90 b/physics/mp_thompson_hrrr_post.F90 index ba5698509..60876d8a3 100644 --- a/physics/mp_thompson_hrrr_post.F90 +++ b/physics/mp_thompson_hrrr_post.F90 @@ -64,17 +64,22 @@ subroutine mp_thompson_hrrr_post_init(ncol, area, ttendlim, errmsg, errflg) !dx = sqrt(area) do i=1,ncol - !! DH* testing/development: limiters on temperature tendency - !! depending on the grid spacing - no limit for >50km *DH - !if (dx(i)<=5000) then - ! mp_tend_lim(i) = 0.07 ! [K/s], 3-km HRRR value - !else if (dx(i)<=50000) then + ! The column-dependent values that were set here previously + ! are replaced with a single value set in the namelist + ! input.nml.This value is independent of the grid spacing + ! (as opposed to setting it here on a per-column basis). + ! However, given that the timestep is the same for all grid + ! columns and determined by the smallest grid spacing in + ! the domain, it makes sense to use a single value. + ! + ! The values previously used in RAP/HRRR were + ! mp_tend_lim(i) = 0.07 ! [K/s], 3-km HRRR value + ! and ! mp_tend_lim(i) = 0.002 ! [K/s], 13-km RAP value - !else - ! ! no limit for grid spacings >50km - ! !mp_tend_lim(i) = 0.00006 ! [K/s], guess for >50km - ! mp_tend_lim(i) = 1.0E+08 - !end if + ! + ! Our testing with FV3 has shown thus far that 0.002 is + ! too small for a 13km (C768) resolution and that 0.01 + ! works better. This is work in progress ... mp_tend_lim(i) = ttendlim end do