From 2ffea27befc617367b205d03a14d335f78250314 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Mon, 29 Mar 2021 16:50:41 -0400 Subject: [PATCH] MOM_hor_visc: Variables moved to stack New diagnostics to horizontal_viscosity were causing issues with stack memory on some platforms, causing the runtime to more than double. Two of the diagnostics were allocatables and the other two were local variables. By redefining the two allocatables as locals (and presumably moving to stack), the faster performance was restored. While the underlying cause is unclear, this is almost certainly due to stack spill in this function, which happens to have a large number of local arrays - including many 3d arrays used to gather diagnostics - and any new variable is going to have volatile consequences. This should be seen as a short term fix. In the future, we need better tools to detect this problem and better guidance on how to responsibly use stack. Also note that two variables were removed: `max_diss_rate_[qh]`. Neither variable was used in the function. --- .../lateral/MOM_hor_visc.F90 | 80 ++++++++++--------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/src/parameterizations/lateral/MOM_hor_visc.F90 b/src/parameterizations/lateral/MOM_hor_visc.F90 index 5bbc495e93..661fb715e7 100644 --- a/src/parameterizations/lateral/MOM_hor_visc.F90 +++ b/src/parameterizations/lateral/MOM_hor_visc.F90 @@ -277,10 +277,6 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, grad_d2vel_mag_h, & ! Magnitude of the Laplacian of the velocity vector, squared [L-2 T-2 ~> m-2 s-2] boundary_mask_h ! A mask that zeroes out cells with at least one land edge [nondim] - real, allocatable, dimension(:,:) :: hf_diffu_2d, hf_diffv_2d ! Depth sum of hf_diffu, hf_diffv [L T-2 ~> m s-2] - real, dimension(SZIB_(G),SZJ_(G)) :: intz_diffu_2d ! Depth-integral of diffu [L2 T-2 ~> m2 s-2] - real, dimension(SZI_(G),SZJB_(G)) :: intz_diffv_2d ! Depth-integral of diffv [L2 T-2 ~> m2 s-2] - real, dimension(SZIB_(G),SZJB_(G)) :: & dvdx, dudy, & ! components in the shearing strain [T-1 ~> s-1] dDel2vdx, dDel2udy, & ! Components in the biharmonic equivalent of the shearing strain [L-2 T-1 ~> m-2 s-1] @@ -309,7 +305,6 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, vort_xy_q, & ! vertical vorticity at corner points [T-1 ~> s-1] sh_xy_q, & ! horizontal shearing strain at corner points [T-1 ~> s-1] GME_coeff_q, & !< GME coeff. at q-points [L2 T-1 ~> m2 s-1] - max_diss_rate_q, & ! maximum possible energy dissipated by lateral friction [L2 T-3 ~> m2 s-3] ShSt ! A diagnostic array of shear stress [T-1 ~> s-1]. real, dimension(SZIB_(G),SZJ_(G),SZK_(GV)+1) :: & KH_u_GME !< interface height diffusivities in u-columns [L2 T-1 ~> m2 s-1] @@ -318,7 +313,6 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: & Ah_h, & ! biharmonic viscosity at thickness points [L4 T-1 ~> m4 s-1] Kh_h, & ! Laplacian viscosity at thickness points [L2 T-1 ~> m2 s-1] - max_diss_rate_h, & ! maximum possible energy dissipated by lateral friction [L2 T-3 ~> m2 s-3] FrictWork, & ! work done by MKE dissipation mechanisms [R L2 T-3 ~> W m-2] FrictWork_GME, & ! work done by GME [R L2 T-3 ~> W m-2] div_xx_h, & ! horizontal divergence [T-1 ~> s-1] @@ -389,6 +383,14 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, hrat_min, & ! h_min divided by the thickness at the stress point (h or q) [nondim] visc_bound_rem ! fraction of overall viscous bounds that remain to be applied [nondim] + real, dimension(SZIB_(G),SZJ_(G)) :: & + hf_diffu_2d, & ! Depth sum of hf_diffu, hf_diffv [L T-2 ~> m s-2] + intz_diffu_2d ! Depth-integral of diffu [L2 T-2 ~> m2 s-2] + + real, dimension(SZI_(G),SZJB_(G)) :: & + hf_diffv_2d, & ! Depth sum of hf_diffu, hf_diffv [L T-2 ~> m s-2] + intz_diffv_2d ! Depth-integral of diffv [L2 T-2 ~> m2 s-2] + is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = GV%ke Isq = G%IscB ; Ieq = G%IecB ; Jsq = G%JscB ; Jeq = G%JecB @@ -505,8 +507,7 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, !$OMP use_MEKE_Ku, use_MEKE_Au, boundary_mask_h, boundary_mask_q, & !$OMP backscat_subround, GME_coeff_limiter, & !$OMP h_neglect, h_neglect3, FWfrac, inv_PI3, inv_PI6, H0_GME, & - !$OMP diffu, diffv, max_diss_rate_h, max_diss_rate_q, & - !$OMP Kh_h, Kh_q, Ah_h, Ah_q, FrictWork, FrictWork_GME, & + !$OMP diffu, diffv, Kh_h, Kh_q, Ah_h, Ah_q, FrictWork, FrictWork_GME, & !$OMP div_xx_h, sh_xx_h, vort_xy_q, sh_xy_q, GME_coeff_h, GME_coeff_q, & !$OMP TD, KH_u_GME, KH_v_GME, grid_Re_Kh, grid_Re_Ah, NoSt, ShSt & !$OMP ) & @@ -1645,38 +1646,39 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, ! enddo ; enddo ; enddo ! call post_data(CS%id_hf_diffv, CS%hf_diffv, CS%diag) !endif - if (present(ADp) .and. (CS%id_hf_diffu_2d > 0)) then - allocate(hf_diffu_2d(G%IsdB:G%IedB,G%jsd:G%jed)) - hf_diffu_2d(:,:) = 0.0 - do k=1,nz ; do j=js,je ; do I=Isq,Ieq - hf_diffu_2d(I,j) = hf_diffu_2d(I,j) + diffu(I,j,k) * ADp%diag_hfrac_u(I,j,k) - enddo ; enddo ; enddo - call post_data(CS%id_hf_diffu_2d, hf_diffu_2d, CS%diag) - deallocate(hf_diffu_2d) - endif - if (present(ADp) .and. (CS%id_hf_diffv_2d > 0)) then - allocate(hf_diffv_2d(G%isd:G%ied,G%JsdB:G%JedB)) - hf_diffv_2d(:,:) = 0.0 - do k=1,nz ; do J=Jsq,Jeq ; do i=is,ie - hf_diffv_2d(i,J) = hf_diffv_2d(i,J) + diffv(i,J,k) * ADp%diag_hfrac_v(i,J,k) - enddo ; enddo ; enddo - call post_data(CS%id_hf_diffv_2d, hf_diffv_2d, CS%diag) - deallocate(hf_diffv_2d) - endif - if (present(ADp) .and. (CS%id_intz_diffu_2d > 0)) then - intz_diffu_2d(:,:) = 0.0 - do k=1,nz ; do j=js,je ; do I=Isq,Ieq - intz_diffu_2d(I,j) = intz_diffu_2d(I,j) + diffu(I,j,k) * ADp%diag_hu(I,j,k) - enddo ; enddo ; enddo - call post_data(CS%id_intz_diffu_2d, intz_diffu_2d, CS%diag) - endif - if (present(ADp) .and. (CS%id_intz_diffv_2d > 0)) then - intz_diffv_2d(:,:) = 0.0 - do k=1,nz ; do J=Jsq,Jeq ; do i=is,ie - intz_diffv_2d(i,J) = intz_diffv_2d(i,J) + diffv(i,J,k) * ADp%diag_hv(i,J,k) - enddo ; enddo ; enddo - call post_data(CS%id_intz_diffv_2d, intz_diffv_2d, CS%diag) + if (present(ADp)) then + if (CS%id_hf_diffu_2d > 0) then + hf_diffu_2d(:,:) = 0.0 + do k=1,nz ; do j=js,je ; do I=Isq,Ieq + hf_diffu_2d(I,j) = hf_diffu_2d(I,j) + diffu(I,j,k) * ADp%diag_hfrac_u(I,j,k) + enddo ; enddo ; enddo + call post_data(CS%id_hf_diffu_2d, hf_diffu_2d, CS%diag) + endif + + if (CS%id_hf_diffv_2d > 0) then + hf_diffv_2d(:,:) = 0.0 + do k=1,nz ; do J=Jsq,Jeq ; do i=is,ie + hf_diffv_2d(i,J) = hf_diffv_2d(i,J) + diffv(i,J,k) * ADp%diag_hfrac_v(i,J,k) + enddo ; enddo ; enddo + call post_data(CS%id_hf_diffv_2d, hf_diffv_2d, CS%diag) + endif + + if (CS%id_intz_diffu_2d > 0) then + intz_diffu_2d(:,:) = 0.0 + do k=1,nz ; do j=js,je ; do I=Isq,Ieq + intz_diffu_2d(I,j) = intz_diffu_2d(I,j) + diffu(I,j,k) * ADp%diag_hu(I,j,k) + enddo ; enddo ; enddo + call post_data(CS%id_intz_diffu_2d, intz_diffu_2d, CS%diag) + endif + + if (CS%id_intz_diffv_2d > 0) then + intz_diffv_2d(:,:) = 0.0 + do k=1,nz ; do J=Jsq,Jeq ; do i=is,ie + intz_diffv_2d(i,J) = intz_diffv_2d(i,J) + diffv(i,J,k) * ADp%diag_hv(i,J,k) + enddo ; enddo ; enddo + call post_data(CS%id_intz_diffv_2d, intz_diffv_2d, CS%diag) + endif endif end subroutine horizontal_viscosity