From 6732a076baa38378872f00b4796ada76ed3c6212 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Tue, 15 Jul 2014 14:53:06 -0400 Subject: [PATCH 1/3] Improve single thread performance --- src/parameterizations/vertical/MOM_KPP.F90 | 61 ++++++++++++++-------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/src/parameterizations/vertical/MOM_KPP.F90 b/src/parameterizations/vertical/MOM_KPP.F90 index 900ca82458..1426005bf6 100644 --- a/src/parameterizations/vertical/MOM_KPP.F90 +++ b/src/parameterizations/vertical/MOM_KPP.F90 @@ -340,6 +340,8 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K real, dimension( G%ke+1, 2) :: Kdiffusivity ! Vertical diffusivity at interfaces (m2/s) real, dimension( G%ke+1 ) :: Kviscosity ! Vertical viscosity at interfaces (m2/s) real, dimension( G%ke+1, 2) :: nonLocalTrans ! Non-local transport for heat/salt at interfaces (non-dimensional) + real, dimension( G%ke ) :: surfBuoyFlux2 + real, dimension( 3*G%ke ) :: rho_1D, pres_1D, Temp_1D, Salt_1D real :: kOBL, OBLdepth_0d, surfFricVel, surfBuoyFlux, Coriolis real :: GoRho, pRef, rho1, rhoK, rhoKm1, Uk, Vk, const1, Cv, sigma real :: zBottomMinusOffset ! Height of bottom plus a little bit (m) @@ -358,7 +360,7 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K real :: hTotV, surfHv, surfV ! Thickness, integral and average of V over the surface layer real :: hTotUm1, surfHum1, surfUm1 ! Same fo i-1,j or U real :: hTotVm1, surfHvm1, surfVm1 ! Same fo i,j-1 of V - + integer :: kk #ifdef __DO_SAFETY_CHECKS__ if (CS%debug) then call hchksum(h, "KPP in: h",G,haloshift=0) @@ -392,7 +394,8 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K !$OMP rho1,rhoK,rhoKm1,deltaRho,N2_1d,N_1d,delH, & !$OMP surfBuoyFlux,Ws_1d,Cv,Vt2_1d,BulkRi_1d, & !$OMP OBLdepth_0d,zBottomMinusOffset,Kdiffusivity, & -!$OMP Kviscosity,sigma,kOBL) +!$OMP Kviscosity,sigma,kOBL,kk,pres_1D,Temp_1D, & +!$OMP Salt_1D,rho_1D,surfBuoyFlux2) do j = G%jsc, G%jec do i = G%isc, G%iec if (G%mask2dT(i,j)==0.) cycle ! Skip calling KPP for land points @@ -417,7 +420,16 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K pRef = 0. do k = 1, G%ke km1 = max(1, k-1) - + kk = 3*(k-1) + pres_1D(kk+1) = pRef + pres_1D(kk+2) = pRef + pres_1D(kk+3) = pRef + Temp_1D(kk+1) = surfTemp + Temp_1D(kk+2) = Temp(i,j,k) + Temp_1D(kk+3) = Temp(i,j,km1) + Salt_1D(kk+1) = surfsalt + Salt_1D(kk+2) = Salt(i,j,k) + Salt_1D(kk+3) = Salt(i,j,km1) ! Compute heights, referenced to the surface (z=0) cellHeight(k) = iFaceHeight(k) - 0.5 * h(i,j,k) * G%H_to_m ! cell center in metres iFaceHeight(k+1) = iFaceHeight(k) - h(i,j,k) * G%H_to_m ! cell bottom in metres @@ -431,16 +443,7 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K Uk = 0.5 * ( abs( u(i,j,k) - surfU ) + abs( u(i-1,j,k) - surfUm1 ) ) ! delta_k U w/ C-grid average Vk = 0.5 * ( abs( v(i,j,k) - surfV ) + abs( v(i,j-1,k) - surfVm1 ) ) ! delta_k V w/ C-grid average deltaU2(k) = Uk**2 + Vk**2 - call calculate_density(surfTemp, surfSalt, pRef, rho1, EOS) - call calculate_density(Temp(i,j,k), Salt(i,j,k), pRef, rhoK, EOS) - call calculate_density(Temp(i,j,km1), Salt(i,j,km1), pRef, rhoKm1, EOS) - deltaRho(k) = rhoK - rho1 - ! N2 is on interfaces. - N2_1d(k) = ( GoRho * (rhoK - rhoKm1) ) / (0.5*(h(i,j,km1) + h(i,j,k))+G%H_subroundoff) ! Can be negative - ! N = sqrt(N^2) but because N^2 can be negative, we clip N^2 before taking the square root ?????? - N_1d(k) = sqrt( max( N2_1d(k), 0.) ) - ! N_1d(k) = sqrt( abs( N2_1d(k)) ) ! From MOM4p1 ????? - ! N_1d(k) = sign( sqrt( abs( N2_1d(k) ) ), N2_1d(k) ) ! Suggestion ???? + ! Pressure at bottom of level k will become pressure at top of level on next iteration pRef = pRef + G%g_Earth * G%Rho0 * h(i,j,k) * G%H_to_m ! Boussinesq approximation!!!! ????? @@ -472,23 +475,39 @@ subroutine KPP_calculate(CS, G, h, Temp, Salt, u, v, EOS, uStar, buoyFlux, Kt, K surfHvm1 = surfHvm1 + v(i,j-1,k) * delH ; surfVm1 = surfHvm1 / hTotVm1 endif enddo ! k + call calculate_density(Temp_1D, Salt_1D, pres_1D, rho_1D, 1, 3*G%ke, EOS) + do k = 1, G%ke + km1 = max(1, k-1) + kk = 3*(k-1) + deltaRho(k) = rho_1D(kk+2) - rho_1D(kk+1) + ! N2 is on interfaces. + N2_1d(k) = ( GoRho * (rho_1D(kk+2) - rho_1D(kk+3)) ) / (0.5*(h(i,j,km1) + h(i,j,k))+G%H_subroundoff) ! Can be negative + ! N = sqrt(N^2) but because N^2 can be negative, we clip N^2 before taking the square root ?????? + N_1d(k) = sqrt( max( N2_1d(k), 0.) ) + ! N_1d(k) = sqrt( abs( N2_1d(k)) ) ! From MOM4p1 ????? + ! N_1d(k) = sign( sqrt( abs( N2_1d(k) ) ), N2_1d(k) ) ! Suggestion ???? + enddo N2_1d( G%ke+1 ) = 0. N_1d( G%ke+1 ) = 0. + do k = 1, G%ke + surfBuoyFlux2(k) = buoyFlux(i,j,1) - buoyFlux(i,j,k+1) ! This difference accounts for penetrating of SW + enddo + + call CVmix_kpp_compute_turbulent_scales( & + eps, & ! (in) Normalized boundary layer depth; sigma = eps + -cellHeight, & ! (in) Guess that OBL depth (m) = -cellHeight(k) + surfBuoyFlux2, & ! (in) Buoyancy flux at surface (m2/s3) + surfFricVel, & ! (in) Turbulent friction velocity at surface (m/s) + w_s=Ws_1d, & ! (out) Turbulent velocity scale profile (m/s) + CVmix_kpp_params_user=CS%KPP_params ) ! KPP parameters + ! Estimate Ws in order to estimate Vt^2 (eq. 23 in LMD94) do k = 1, G%ke ! Note that if sigma > eps, then CVmix_kpp_compute_turbulent_scales ! computes w_s and w_m velocity scale at sigma=eps. So we only pass ! sigma=eps for this calculation. - surfBuoyFlux = buoyFlux(i,j,1) - buoyFlux(i,j,k+1) ! This difference accounts for penetrating of SW - call CVmix_kpp_compute_turbulent_scales( & - eps, & ! (in) Normalized boundary layer depth; sigma = eps - -cellHeight(k), & ! (in) Guess that OBL depth (m) = -cellHeight(k) - surfBuoyFlux, & ! (in) Buoyancy flux at surface (m2/s3) - surfFricVel, & ! (in) Turbulent friction velocity at surface (m/s) - w_s=Ws_1d(k), & ! (out) Turbulent velocity scale profile (m/s) - CVmix_kpp_params_user=CS%KPP_params ) ! KPP parameters ! Unresolved squared velocity, Vt^2, eq 23 from LMD94 Cv = max( 1.7, 2.1 - 200. * N_1d(k) ) ! Cv from eq A3 of Danabasoglu et al. 2006 From 8c502e3931e18de9c3aee28e3f3246ca89d939a1 Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Mon, 21 Jul 2014 14:13:32 -0400 Subject: [PATCH 2/3] fix to reproduce old results --- src/core/MOM_barotropic.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/MOM_barotropic.F90 b/src/core/MOM_barotropic.F90 index 52ed47f91e..adb3597596 100644 --- a/src/core/MOM_barotropic.F90 +++ b/src/core/MOM_barotropic.F90 @@ -729,7 +729,7 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & if (project_velocity) then trans_wt1 = (1.0 + be_proj); trans_wt2 = -be_proj else - trans_wt1 = (1.0-bebt); trans_wt2 = bebt + trans_wt1 = bebt ; trans_wt2 = (1.0-bebt) endif do_hifreq_output = .false. From 58230a302183cac7d9ecd947cdeb481785af42dc Mon Sep 17 00:00:00 2001 From: Zhi Liang Date: Tue, 22 Jul 2014 09:39:46 -0400 Subject: [PATCH 3/3] fix for OBC --- src/core/MOM_barotropic.F90 | 55 ++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/core/MOM_barotropic.F90 b/src/core/MOM_barotropic.F90 index adb3597596..831203a84e 100644 --- a/src/core/MOM_barotropic.F90 +++ b/src/core/MOM_barotropic.F90 @@ -1661,7 +1661,8 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & !$OMP use_BT_cont,BTCL_v,vhbt0,Datv,wt_vel,azon,bzon,czon, & !$OMP dzon,Cor_ref_u,gtot_E,gtot_W,u_accel_bt,bt_rem_u, & !$OMP BT_force_u,uhbt,BTCL_u,uhbt0,Datu,Cor_u,Cor_v, & -!$OMP PFu,PFv,ubt_trans,vbt_trans ) & +!$OMP PFu,PFv,ubt_trans,vbt_trans,apply_v_OBCs,BT_OBC, & +!$OMP vbt_prev,vhbt_prev,apply_u_OBCs,ubt_prev,uhbt_prev ) & !$OMP private(vel_prev) if (MOD(n+G%first_direction,2)==1) then ! On odd-steps, update v first. @@ -1699,7 +1700,12 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & vhbt(i,J) = Datv(i,J)*vbt_trans(i,J) + vhbt0(i,J) enddo ; enddo endif - + if(apply_v_OBCs) then ! copy back the value for the points that OBC_mask_v is true. +!$OMP do + do J=jsv-1,jev ; do i=isv-1,iev+1 ; if (BT_OBC%OBC_mask_v(i,J)) then + vbt(i,J) = vbt_prev(i,J); vhbt(i,J) = vhbt_prev(i,J) + endif ; enddo ; enddo + endif ! Now update the zonal velocity. !$OMP do do j=jsv,jev ; do I=isv-1,iev @@ -1736,7 +1742,12 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & uhbt(I,j) = Datu(I,j)*ubt_trans(I,j) + uhbt0(I,j) enddo ; enddo endif - + if(apply_u_OBCs) then ! copy back the value for the points that OBC_mask_v is true. +!$OMP do + do j=jsv,jev ; do I=isv-1,iev ; if (BT_OBC%OBC_mask_u(I,j)) then + ubt(I,j) = ubt_prev(I,j); uhbt(I,j) = uhbt_prev(I,j) + endif ; enddo ; enddo + endif else ! On even steps, update u first. !$OMP do @@ -1775,6 +1786,12 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & uhbt(I,j) = Datu(I,j)*ubt_trans(I,j) + uhbt0(I,j) enddo ; enddo endif + if(apply_u_OBCs) then ! copy back the value for the points that OBC_mask_v is true. +!$OMP do + do j=jsv-1,jev+1 ; do I=isv-1,iev ; if (BT_OBC%OBC_mask_u(I,j)) then + ubt(I,j) = ubt_prev(I,j); uhbt(I,j) = uhbt_prev(I,j) + endif ; enddo ; enddo + endif ! Now update the meridional velocity. !$OMP do @@ -1811,6 +1828,12 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & vhbt(i,J) = Datv(i,J)*vbt_trans(i,J) + vhbt0(i,J) enddo ; enddo endif + if(apply_v_OBCs) then ! copy back the value for the points that OBC_mask_v is true. +!$OMP do + do J=jsv-1,jev ; do i=isv,iev ; if (BT_OBC%OBC_mask_v(i,J)) then + vbt(i,J) = vbt_prev(i,J); vhbt(i,J) = vhbt_prev(i,J) + endif ; enddo ; enddo + endif endif !$OMP end parallel @@ -1857,41 +1880,23 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, & if (apply_OBCs) then if (apply_u_OBCs) then ! copy back the value for the points that OBC_mask_v is true. -!$OMP parallel default(none) shared(isv,iev,jsv,jev,ioff,joff,ubt_prev,ubt,uhbt_prev, & -!$OMP uhbt,is,ie,js,je,ubt_sum_prev,ubt_sum, & -!$OMP uhbt_sum_prev,uhbt_sum,ubt_wtd_prev,ubt_wtd,BT_OBC) -!$OMP do - do J=jsv-joff,jev+joff ; do i=isv-1,iev - if (BT_OBC%OBC_mask_u(i,J)) then - ubt(i,J) = ubt_prev(i,J); uhbt(i,J) = uhbt_prev(i,J) - endif - enddo ; enddo -!$OMP do +!$OMP parallel do default(none) shared(is,ie,js,je,ubt_sum_prev,ubt_sum,uhbt_sum_prev,& +!$OMP uhbt_sum,ubt_wtd_prev,ubt_wtd,BT_OBC) do j=js,je ; do I=is-1,ie if (BT_OBC%OBC_mask_u(i,J)) then ubt_sum(i,J)=ubt_sum_prev(i,J); uhbt_sum(i,J)=uhbt_sum_prev(i,J) ; ubt_wtd(i,J)=ubt_wtd_prev(i,J) endif enddo ; enddo -!$OMP end parallel endif if (apply_v_OBCs) then ! copy back the value for the points that OBC_mask_v is true. -!$OMP parallel default(none) shared(isv,iev,jsv,jev,ioff,joff,vbt_prev,vbt,vhbt_prev, & -!$OMP vhbt,is,ie,js,je,vbt_sum_prev,vbt_sum, & -!$OMP vhbt_sum_prev,vhbt_sum,vbt_wtd_prev,vbt_wtd,BT_OBC) -!$OMP do - do J=jsv-1,jev ; do i=isv-ioff,iev-ioff - if (BT_OBC%OBC_mask_v(i,J)) then - vbt(i,J) = vbt_prev(i,J); vhbt(i,J) = vhbt_prev(i,J) - endif - enddo ; enddo -!$OMP do +!$OMP parallel do default(none) shared(is,ie,js,je,vbt_sum_prev,vbt_sum,vhbt_sum_prev, & +!$OMP vhbt_sum,vbt_wtd_prev,vbt_wtd,BT_OBC) do J=js-1,je ; do I=is,ie if (BT_OBC%OBC_mask_v(i,J)) then vbt_sum(i,J)=vbt_sum_prev(i,J); vhbt_sum(i,J)=vhbt_sum_prev(i,J) ; vbt_wtd(i,J)=vbt_wtd_prev(i,J) endif enddo ; enddo -!$OMP end parallel endif call apply_velocity_OBCs(OBC, ubt, vbt, uhbt, vhbt, &