diff --git a/src/core/MOM.F90 b/src/core/MOM.F90 index b363da01cb..d07011057a 100644 --- a/src/core/MOM.F90 +++ b/src/core/MOM.F90 @@ -666,7 +666,7 @@ subroutine step_MOM(forces_in, fluxes_in, sfc_state, Time_start, time_int_in, CS call set_derived_forcing_fields(forces, fluxes, G, US, GV%Rho0) endif endif - !$omp target enter data map(to: forces, forces%taux, forces%tauy) + !$omp target enter data map(to: forces, forces%taux, forces%tauy, forces%ustar) ! This will be replaced later with the pressures from forces or fluxes if they are available. if (associated(CS%tv%p_surf)) CS%tv%p_surf(:,:) = 0.0 @@ -726,7 +726,7 @@ subroutine step_MOM(forces_in, fluxes_in, sfc_state, Time_start, time_int_in, CS if (nonblocking_p_surf_update) then call start_group_pass(pass_tau_ustar_psurf, G%Domain) else - call do_group_pass(pass_tau_ustar_psurf, G%Domain) + call do_group_pass(pass_tau_ustar_psurf, G%Domain, omp_offload=.true.) endif call cpu_clock_end(id_clock_pass) diff --git a/src/core/MOM_PressureForce_FV.F90 b/src/core/MOM_PressureForce_FV.F90 index 057d514b60..2d34d80e6e 100644 --- a/src/core/MOM_PressureForce_FV.F90 +++ b/src/core/MOM_PressureForce_FV.F90 @@ -1363,9 +1363,11 @@ subroutine PressureForce_FV_Bouss(h, tv, PFu, PFv, G, GV, US, CS, ALE_CSp, ADp, endif ! Set the pressure anomalies at the interfaces. - do k=1,nz - do concurrent (j=Jsq:Jeq+1, i=Isq:Ieq+1) - pa(i,j,K+1) = pa(i,j,K) + dpa(i,j,k) + do concurrent (j=Jsq:Jeq+1) + do k=1,nz + do concurrent (i=Isq:Ieq+1) + pa(i,j,K+1) = pa(i,j,K) + dpa(i,j,k) + enddo enddo enddo @@ -1565,32 +1567,24 @@ subroutine PressureForce_FV_Bouss(h, tv, PFu, PFv, G, GV, US, CS, ALE_CSp, ADp, ! assuming that the surface pressure anomaly varies linearly in x and y. ! If there is an ice-shelf or icebergs, this linear variation would need to be applied ! to an interior interface. - !$omp target - !$omp parallel loop collapse(2) - do j=js,je ; do I=Isq,Ieq + do concurrent (j=js:je, I=Isq:Ieq) intx_pa(I,j,1) = 0.5*(pa(i,j,1) + pa(i+1,j,1)) - enddo ; enddo - !$omp parallel loop collapse(2) - do J=Jsq,Jeq ; do i=is,ie + enddo + do concurrent (J=Jsq:Jeq, i=is:ie) inty_pa(i,J,1) = 0.5*(pa(i,j,1) + pa(i,j+1,1)) - enddo ; enddo - !$omp end target + enddo endif - !$omp target do k=1,nz - !$omp parallel loop collapse(2) - do j=js,je ; do I=Isq,Ieq + do concurrent (j=js:je, I=Isq:Ieq) intx_pa(I,j,K+1) = intx_pa(I,j,K) + intx_dpa(I,j,k) - enddo ; enddo + enddo enddo do k=1,nz - !$omp parallel loop collapse(2) - do J=Jsq,Jeq ; do i=is,ie + do concurrent (J=Jsq:Jeq, i=is:ie) inty_pa(i,J,K+1) = inty_pa(i,J,K) + inty_dpa(i,J,k) - enddo ; enddo + enddo enddo - !$omp end target if (CS%reset_intxpa_integral) then ! Having stored the pressure gradient info, we can work out where the first nonvanished layers is @@ -1930,13 +1924,11 @@ subroutine PressureForce_FV_Bouss(h, tv, PFu, PFv, G, GV, US, CS, ALE_CSp, ADp, enddo endif - do k=1,nz - do concurrent (j=js:je, I=Isq:Ieq) - PFu(I,j,k) = PFu(I,j,k) - (dM(i+1,j) - dM(i,j)) * G%IdxCu(I,j) - enddo - do concurrent (J=Jsq:Jeq, i=is:ie) - PFv(i,J,k) = PFv(i,J,k) - (dM(i,j+1) - dM(i,j)) * G%IdyCv(i,J) - enddo + do concurrent (k=1:nz, j=js:je, I=Isq:Ieq) + PFu(I,j,k) = PFu(I,j,k) - (dM(i+1,j) - dM(i,j)) * G%IdxCu(I,j) + enddo + do concurrent (k=1:nz, J=Jsq:Jeq, i=is:ie) + PFv(i,J,k) = PFv(i,J,k) - (dM(i,j+1) - dM(i,j)) * G%IdyCv(i,J) enddo !$omp end target data diff --git a/src/core/MOM_PressureForce_Montgomery.F90 b/src/core/MOM_PressureForce_Montgomery.F90 index 6e148ee642..74ae2b9af5 100644 --- a/src/core/MOM_PressureForce_Montgomery.F90 +++ b/src/core/MOM_PressureForce_Montgomery.F90 @@ -689,17 +689,19 @@ subroutine Set_pbce_Bouss(e, tv, G, GV, US, Rho0, GFS_scale, pbce, rho_star) if (use_EOS) then if (present(rho_star)) then - do concurrent (j=Jsq:Jeq+1, i=Isq:Ieq+1) + do concurrent (j=Jsq:Jeq+1) + do concurrent (i=Isq:Ieq+1) Ihtot(i,j) = GV%H_to_Z / ((e(i,j,1) - e(i,j,nz+1)) + dz_neglect) pbce(i,j,1) = GFS_scale * rho_star(i,j,1) * GV%H_to_Z enddo do k=2,nz - do concurrent (j=Jsq:Jeq+1, i=Isq:Ieq+1) + do concurrent (i=Isq:Ieq+1) pbce(i,j,k) = pbce(i,j,k-1) + (rho_star(i,j,k) - rho_star(i,j,k-1)) & * ((e(i,j,K) - e(i,j,nz+1)) * Ihtot(i,j)) enddo enddo + enddo else !$omp target data & !$omp map(alloc: EOSdom, press, T_int, S_int, rho_in_situ) & @@ -743,14 +745,16 @@ subroutine Set_pbce_Bouss(e, tv, G, GV, US, Rho0, GFS_scale, pbce, rho_star) !$omp end target data endif else - do concurrent (j=Jsq:Jeq+1, i=Isq:Ieq+1) - Ihtot(i,j) = 1.0 / ((e(i,j,1) - e(i,j,nz+1)) + dz_neglect) - pbce(i,j,1) = GV%g_prime(1) * GV%H_to_Z - enddo - do k=2,nz - do concurrent (j=Jsq:Jeq+1, i=Isq:Ieq+1) - pbce(i,j,k) = pbce(i,j,k-1) + (GV%g_prime(K) * GV%H_to_Z) & - * ((e(i,j,K) - e(i,j,nz+1)) * Ihtot(i,j)) + do concurrent (j=Jsq:Jeq+1) + do concurrent (i=Isq:Ieq+1) + Ihtot(i,j) = 1.0 / ((e(i,j,1) - e(i,j,nz+1)) + dz_neglect) + pbce(i,j,1) = GV%g_prime(1) * GV%H_to_Z + enddo + do k=2,nz + do concurrent (i=Isq:Ieq+1) + pbce(i,j,k) = pbce(i,j,k-1) + (GV%g_prime(K) * GV%H_to_Z) & + * ((e(i,j,K) - e(i,j,nz+1)) * Ihtot(i,j)) + enddo enddo enddo endif diff --git a/src/core/MOM_barotropic.F90 b/src/core/MOM_barotropic.F90 index 2448b2d051..10adc0a492 100644 --- a/src/core/MOM_barotropic.F90 +++ b/src/core/MOM_barotropic.F90 @@ -1068,27 +1068,39 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, do concurrent (J=js-1:je, i=is-1:ie+1) vbt_Cor(i,J) = 0.0 enddo - do k=1,nz ; do concurrent (j=js:je, I=is-1:ie) - ubt_Cor(I,j) = ubt_Cor(I,j) + wt_u(I,j,k) * U_Cor(I,j,k) - enddo ; enddo - do k=1,nz ; do concurrent (J=js-1:je, i=is:ie) - vbt_Cor(i,J) = vbt_Cor(i,J) + wt_v(i,J,k) * V_Cor(i,J,k) - enddo ; enddo + do concurrent (j=js:je) + do k=1,nz + do concurrent (I=is-1:ie) + ubt_Cor(I,j) = ubt_Cor(I,j) + wt_u(I,j,k) * U_Cor(I,j,k) + enddo + enddo + enddo + do concurrent (J=js-1:je) + do k=1,nz + do concurrent (i=is:ie) + vbt_Cor(i,J) = vbt_Cor(i,J) + wt_v(i,J,k) * V_Cor(i,J,k) + enddo + enddo + enddo ! The gtot arrays are the effective layer-weighted reduced gravities for ! accelerations across the various faces, with names for the relative ! locations of the faces to the pressure point. They will have their halos ! updated later on. - do k=1,nz - do concurrent (j=js:je, i=is-1:ie) - gtot_E(i,j) = gtot_E(i,j) + pbce(i,j,k) * wt_u(I,j,k) - gtot_W(i+1,j) = gtot_W(i+1,j) + pbce(i+1,j,k) * wt_u(I,j,k) + do concurrent (j=js:je) + do k=1,nz + do concurrent (i=is-1:ie) + gtot_E(i,j) = gtot_E(i,j) + pbce(i,j,k) * wt_u(I,j,k) + gtot_W(i+1,j) = gtot_W(i+1,j) + pbce(i+1,j,k) * wt_u(I,j,k) + enddo enddo enddo - do k=1,nz - do concurrent (J=js-1:je, i=is:ie) - gtot_N(i,j) = gtot_N(i,j) + pbce(i,j,k) * wt_v(i,J,k) - gtot_S(i,j+1) = gtot_S(i,j+1) + pbce(i,j+1,k) * wt_v(i,J,k) + do concurrent (J=js-1:je) + do k=1,nz + do concurrent (i=is:ie) + gtot_N(i,j) = gtot_N(i,j) + pbce(i,j,k) * wt_v(i,J,k) + gtot_S(i,j+1) = gtot_S(i,j+1) + pbce(i,j+1,k) * wt_v(i,J,k) + enddo enddo enddo @@ -1172,14 +1184,22 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, vbt(i,J) = vbt(i,J) + wt_v(i,J,k) * v_vh0(i,J,k) enddo ; enddo else - do k=1,nz ; do concurrent (j=js:je, I=is-1:ie) - uhbt(I,j) = uhbt(I,j) + uh0(I,j,k) - ubt(I,j) = ubt(I,j) + CS%frhatu(I,j,k) * u_uh0(I,j,k) - enddo ; enddo - do k=1,nz ; do concurrent (J=js-1:je, i=is:ie) - vhbt(i,J) = vhbt(i,J) + vh0(i,J,k) - vbt(i,J) = vbt(i,J) + CS%frhatv(i,J,k) * v_vh0(i,J,k) - enddo ; enddo + do concurrent (j=js:je) + do k=1,nz + do concurrent (I=is-1:ie) + uhbt(I,j) = uhbt(I,j) + uh0(I,j,k) + ubt(I,j) = ubt(I,j) + CS%frhatu(I,j,k) * u_uh0(I,j,k) + enddo + enddo + enddo + do concurrent (J=js-1:je) + do k=1,nz + do concurrent (i=is:ie) + vhbt(i,J) = vhbt(i,J) + vh0(i,J,k) + vbt(i,J) = vbt(i,J) + CS%frhatv(i,J,k) * v_vh0(i,J,k) + enddo + enddo + enddo endif if ((use_BT_cont .or. integral_BT_cont) .and. CS%adjust_BT_cont) then ! Use the additional input transports to broaden the fits @@ -1324,12 +1344,20 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, ! bc_accel_u & bc_accel_v are only available on the potentially ! non-symmetric computational domain. - do k=1,nz ; do concurrent (j=js:je, I=Isq:Ieq) - BT_force_u(I,j) = BT_force_u(I,j) + wt_u(I,j,k) * bc_accel_u(I,j,k) - enddo ; enddo - do k=1,nz ; do concurrent (J=Jsq:Jeq, i=is:ie) - BT_force_v(i,J) = BT_force_v(i,J) + wt_v(i,J,k) * bc_accel_v(i,J,k) - enddo ; enddo + do concurrent (j=js:je) + do k=1,nz + do concurrent (I=Isq:Ieq) + BT_force_u(I,j) = BT_force_u(I,j) + wt_u(I,j,k) * bc_accel_u(I,j,k) + enddo + enddo + enddo + do concurrent (J=Jsq:Jeq) + do k=1,nz + do concurrent (i=is:ie) + BT_force_v(i,J) = BT_force_v(i,J) + wt_v(i,J,k) * bc_accel_v(i,J,k) + enddo + enddo + enddo if (CS%gradual_BT_ICs) then do concurrent (j=js:je, I=is-1:ie) @@ -1479,15 +1507,23 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, do concurrent (j=js-1:je+1, I=is-1:ie) av_rem_u(I,j) = 0.0 enddo - do concurrent (J=js-1:je, i=is-1:ie+1) - av_rem_v(i,J) = 0.0 + do concurrent (j=js:je) + do k=1,nz + do concurrent (I=is-1:ie) + av_rem_u(I,j) = av_rem_u(I,j) + CS%frhatu(I,j,k) * visc_rem_u(I,j,k) + enddo + enddo + enddo + do concurrent (J=js-1:je) + do concurrent(i=is-1:ie+1) + av_rem_v(i,J) = 0.0 + enddo + do k=1,nz + do concurrent (i=is:ie) + av_rem_v(i,J) = av_rem_v(i,J) + CS%frhatv(i,J,k) * visc_rem_v(i,J,k) + enddo + enddo enddo - do k=1,nz ; do concurrent (j=js:je, I=is-1:ie) - av_rem_u(I,j) = av_rem_u(I,j) + CS%frhatu(I,j,k) * visc_rem_u(I,j,k) - enddo ; enddo - do k=1,nz ; do concurrent (J=js-1:je, i=is:ie) - av_rem_v(i,J) = av_rem_v(i,J) + CS%frhatv(i,J,k) * visc_rem_v(i,J,k) - enddo ; enddo if (CS%strong_drag) then do concurrent (j=js:je, I=is-1:ie) bt_rem_u(I,j) = G%mask2dCu(I,j) * & @@ -1815,6 +1851,7 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, else I_sum_wt_vel = 1.0 ; I_sum_wt_eta = 1.0 ; I_sum_wt_accel = 1.0 ; I_sum_wt_trans = 1.0 endif + !$omp target enter data map(to: wt_vel, wt_eta, wt_accel, wt_trans, wt_accel2) ! March the barotropic solver through all of its time steps. call btstep_timeloop(eta, ubt, vbt, uhbt0, Datu, BTCL_u, vhbt0, Datv, BTCL_v, eta_IC, & @@ -1826,6 +1863,8 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, LDv_avg, use_BT_cont, interp_eta_PF, find_etaav, dt, dtbt, nstep, nfilter, & wt_vel, wt_eta, wt_accel, wt_trans, wt_accel2, ADp, CS%BT_OBC, CS, G, MS, GV, US) + !$omp target exit data map(release: wt_vel, wt_eta, wt_accel, wt_trans, wt_accel2) + if (id_clock_calc > 0) call cpu_clock_end(id_clock_calc) if (id_clock_calc_post > 0) call cpu_clock_begin(id_clock_calc_post) @@ -3428,18 +3467,25 @@ subroutine btstep_ubt_from_layer(U_in, V_in, wt_u, wt_v, ubt, vbt, G, GV, CS) vbt(i,j) = 0.0 enddo - do k=1,nz ; do concurrent (j=js:je, I=is-1:ie) - ubt(I,j) = ubt(I,j) + wt_u(I,j,k) * U_in(I,j,k) - enddo ; enddo - do k=1,nz ; do concurrent (J=js-1:je, i=is:ie) - vbt(i,J) = vbt(i,J) + wt_v(i,J,k) * V_in(i,J,k) - enddo ; enddo - - do concurrent (j=js:je, I=is-1:ie) - if (abs(ubt(I,j)) < CS%vel_underflow) ubt(I,j) = 0.0 + do concurrent (j=js:je) + do k=1,nz + do concurrent (I=is-1:ie) + ubt(I,j) = ubt(I,j) + wt_u(I,j,k) * U_in(I,j,k) + enddo + enddo + do concurrent (I=is-1:ie) + if (abs(ubt(I,j)) < CS%vel_underflow) ubt(I,j) = 0.0 + enddo enddo - do concurrent (J=js-1:je, i=is:ie) - if (abs(vbt(i,J)) < CS%vel_underflow) vbt(i,J) = 0.0 + do concurrent (J=js-1:je) + do k=1,nz + do concurrent (i=is:ie) + vbt(i,J) = vbt(i,J) + wt_v(i,J,k) * V_in(i,J,k) + enddo + enddo + do concurrent (i=is:ie) + if (abs(vbt(i,J)) < CS%vel_underflow) vbt(i,J) = 0.0 + enddo enddo end subroutine btstep_ubt_from_layer @@ -3604,16 +3650,20 @@ subroutine set_dtbt(G, GV, US, CS, pbce, gtot_est, BT_cont, eta, SSH_add) dgeo_de = 1.0 + max(0.0, CS%G_extra - det_de) endif if (present(pbce)) then - do concurrent (j=js:je, i=is:ie) - gtot_E(i,j) = 0.0 ; gtot_W(i,j) = 0.0 - gtot_N(i,j) = 0.0 ; gtot_S(i,j) = 0.0 + do concurrent (j=js:je) + do concurrent (i=is:ie) + gtot_E(i,j) = 0.0 ; gtot_W(i,j) = 0.0 + gtot_N(i,j) = 0.0 ; gtot_S(i,j) = 0.0 + enddo + do k=1,nz + do concurrent (i=is:ie) + gtot_E(i,j) = gtot_E(i,j) + pbce(i,j,k) * CS%frhatu(I,j,k) + gtot_W(i,j) = gtot_W(i,j) + pbce(i,j,k) * CS%frhatu(I-1,j,k) + gtot_N(i,j) = gtot_N(i,j) + pbce(i,j,k) * CS%frhatv(i,J,k) + gtot_S(i,j) = gtot_S(i,j) + pbce(i,j,k) * CS%frhatv(i,J-1,k) + enddo + enddo enddo - do k=1,nz ; do concurrent (j=js:je, i=is:ie) - gtot_E(i,j) = gtot_E(i,j) + pbce(i,j,k) * CS%frhatu(I,j,k) - gtot_W(i,j) = gtot_W(i,j) + pbce(i,j,k) * CS%frhatu(I-1,j,k) - gtot_N(i,j) = gtot_N(i,j) + pbce(i,j,k) * CS%frhatv(i,J,k) - gtot_S(i,j) = gtot_S(i,j) + pbce(i,j,k) * CS%frhatv(i,J-1,k) - enddo ; enddo else do concurrent (j=js:je, i=is:ie) gtot_E(i,j) = gtot_est ; gtot_W(i,j) = gtot_est @@ -5346,31 +5396,33 @@ subroutine bt_mass_source(h, eta, set_cor, G, GV, CS) !$omp target data map(alloc: eta_h) - if (GV%Boussinesq) then - do concurrent (j=js:je, i=is:ie) - eta_h(i,j) = h(i,j,1) - G%bathyT(i,j)*GV%Z_to_H - enddo - else - do concurrent (j=js:je, i=is:ie) - eta_h(i,j) = h(i,j,1) - enddo - endif - do k=2,nz - do concurrent (j=js:je, i=is:ie) - eta_h(i,j) = eta_h(i,j) + h(i,j,k) + do concurrent (j=js:je) + if (GV%Boussinesq) then + do concurrent (i=is:ie) + eta_h(i,j) = h(i,j,1) - G%bathyT(i,j)*GV%Z_to_H + enddo + else + do concurrent (i=is:ie) + eta_h(i,j) = h(i,j,1) + enddo + endif + do k=2,nz + do concurrent (i=is:ie) + eta_h(i,j) = eta_h(i,j) + h(i,j,k) + enddo enddo + if (set_cor) then + do concurrent (i=is:ie) + d_eta = eta_h(i,j) - eta(i,j) + CS%eta_cor(i,j) = d_eta + enddo + else + do concurrent (i=is:ie) + d_eta = eta_h(i,j) - eta(i,j) + CS%eta_cor(i,j) = CS%eta_cor(i,j) + d_eta + enddo + endif enddo - if (set_cor) then - do concurrent (j=js:je, i=is:ie) - d_eta = eta_h(i,j) - eta(i,j) - CS%eta_cor(i,j) = d_eta - enddo - else - do concurrent (j=js:je, i=is:ie) - d_eta = eta_h(i,j) - eta(i,j) - CS%eta_cor(i,j) = CS%eta_cor(i,j) + d_eta - enddo - endif !$omp end target data diff --git a/src/core/MOM_dynamics_split_RK2.F90 b/src/core/MOM_dynamics_split_RK2.F90 index 7b82b9af95..00e8e9ccbf 100644 --- a/src/core/MOM_dynamics_split_RK2.F90 +++ b/src/core/MOM_dynamics_split_RK2.F90 @@ -1186,7 +1186,6 @@ subroutine step_MOM_dyn_split_RK2(u_inst, v_inst, h, tv, visc, Time_local, dt, f ! CAu = -(f+zeta_av)/h_av vh + d/dx KE_av call CorAdCalc(u_av, v_av, h_av, uh, vh, CS%CAu_pred, CS%CAv_pred, CS%OBC, CS%AD_pred, & G, GV, US, CS%CoriolisAdv, pbv, Waves=Waves) - !$omp target update from(CS%CAu_pred, CS%CAv_pred) CS%CAu_pred_stored = .true. call enable_averages(dt, Time_local, CS%diag) ! Reenable the averaging call cpu_clock_end(id_clock_Cor) diff --git a/src/core/MOM_forcing_type.F90 b/src/core/MOM_forcing_type.F90 index f51ec928b6..fe92d64ad4 100644 --- a/src/core/MOM_forcing_type.F90 +++ b/src/core/MOM_forcing_type.F90 @@ -1228,6 +1228,8 @@ subroutine find_ustar_fluxes(fluxes, tv, U_star, G, GV, US, halo, H_T_units) enddo ; enddo endif + !$omp target update to(U_star) + end subroutine find_ustar_fluxes @@ -1263,13 +1265,13 @@ subroutine find_ustar_mech_forcing(forces, tv, U_star, G, GV, US, halo, H_T_unit if (associated(forces%ustar) .and. (GV%Boussinesq .or. .not.associated(forces%tau_mag))) then if (Z_T_units) then - do j=js,je ; do i=is,ie + do concurrent (j=js:je, i=is:ie) !do j=js,je ; do i=is,ie U_star(i,j) = forces%ustar(i,j) - enddo ; enddo + enddo else - do j=js,je ; do i=is,ie + do concurrent (j=js:je, i=is:ie) !do j=js,je ; do i=is,ie U_star(i,j) = GV%Z_to_H * forces%ustar(i,j) - enddo ; enddo + enddo endif elseif (allocated(tv%SpV_avg)) then if (tv%valid_SpV_halo < 0) call MOM_error(FATAL, & @@ -1277,20 +1279,20 @@ subroutine find_ustar_mech_forcing(forces, tv, U_star, G, GV, US, halo, H_T_unit if (tv%valid_SpV_halo < hs) call MOM_error(FATAL, & "find_ustar_mech called in non-Boussinesq mode with insufficient valid values of SpV_avg.") if (Z_T_units) then - do j=js,je ; do i=is,ie + do concurrent (j=js:je, i=is:ie) U_star(i,j) = sqrt(forces%tau_mag(i,j) * tv%SpV_avg(i,j,1)) - enddo ; enddo + enddo else - do j=js,je ; do i=is,ie + do concurrent (j=js:je, i=is:ie) U_star(i,j) = GV%RZ_to_H * sqrt(forces%tau_mag(i,j) / tv%SpV_avg(i,j,1)) - enddo ; enddo + enddo endif else I_rho = GV%Z_to_H * GV%RZ_to_H if (Z_T_units) I_rho = GV%H_to_Z * GV%RZ_to_H ! == 1.0 / GV%Rho0 - do j=js,je ; do i=is,ie + do concurrent (j=js:je, i=is:ie) U_star(i,j) = sqrt(forces%tau_mag(i,j) * I_rho) - enddo ; enddo + enddo endif end subroutine find_ustar_mech_forcing diff --git a/src/parameterizations/vertical/MOM_set_viscosity.F90 b/src/parameterizations/vertical/MOM_set_viscosity.F90 index 5cddbf86fe..12191fd62d 100644 --- a/src/parameterizations/vertical/MOM_set_viscosity.F90 +++ b/src/parameterizations/vertical/MOM_set_viscosity.F90 @@ -2521,7 +2521,6 @@ subroutine set_viscous_ML(u, v, h, tv, forces, visc, dt, G, GV, US, CS) visc%nkml_visc_v(i,J) = k_massive(i) endif ; enddo ; endif - !$omp target update to(visc%nkml_visc_u, visc%nkml_visc_v) endif ! dynamic_viscous_ML do_any_shelf = .false. @@ -2690,6 +2689,8 @@ subroutine set_viscous_ML(u, v, h, tv, forces, visc, dt, G, GV, US, CS) enddo ! J-loop at v-points + !$omp target update to(visc%nkml_visc_u, visc%nkml_visc_v) if (CS%dynamic_viscous_ML) + if (CS%debug) then if (allocated(visc%nkml_visc_u) .and. allocated(visc%nkml_visc_v)) & call uvchksum("nkml_visc_[uv]", visc%nkml_visc_u, visc%nkml_visc_v, & diff --git a/src/parameterizations/vertical/MOM_vert_friction.F90 b/src/parameterizations/vertical/MOM_vert_friction.F90 index 210a6b2b13..26c5784a48 100644 --- a/src/parameterizations/vertical/MOM_vert_friction.F90 +++ b/src/parameterizations/vertical/MOM_vert_friction.F90 @@ -1,3 +1,5 @@ +#include "do_concurrent_compat.h" + !> Implements vertical viscosity (vertvisc) module MOM_vert_friction @@ -781,35 +783,39 @@ subroutine vertvisc(u, v, h, forces, visc, dt, OBC, ADp, CDp, G, GV, US, CS, & enddo endif - do k=2,nz - if (allocated(visc%Ray_u)) then - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq) - Ray(I,j) = visc%Ray_u(I,j,k) + do concurrent (j=G%jsc:G%jec) + do k=2,nz + if (allocated(visc%Ray_u)) then + do concurrent (I=Isq:Ieq) + Ray(I,j) = visc%Ray_u(I,j,k) + enddo + endif + + do concurrent (I=Isq:Ieq, G%mask2dCu(I,j) > 0.) + c1(I,j,k) = dt * CS%a_u(I,j,K) * b1(I,j) + b_denom_1 = CS%h_u(I,j,k) + dt * (Ray(I,j) + CS%a_u(I,j,K)*d1(I,j)) + b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,K+1)) + d1(I,j) = b_denom_1 * b1(I,j) + u(I,j,k) = (CS%h_u(I,j,k) * u(I,j,k) + & + dt * CS%a_u(I,j,K) * u(I,j,k-1)) * b1(I,j) enddo - endif - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(I,j) > 0.) - c1(I,j,k) = dt * CS%a_u(I,j,K) * b1(I,j) - b_denom_1 = CS%h_u(I,j,k) + dt * (Ray(I,j) + CS%a_u(I,j,K)*d1(I,j)) - b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,K+1)) - d1(I,j) = b_denom_1 * b1(I,j) - u(I,j,k) = (CS%h_u(I,j,k) * u(I,j,k) + & - dt * CS%a_u(I,j,K) * u(I,j,k-1)) * b1(I,j) + if (associated(ADp%du_dt_str)) then + do concurrent (I=Isq:Ieq, G%mask2dCu(I,j) > 0.) + ADp%du_dt_str(I,j,k) = (CS%h_u(I,j,k) * ADp%du_dt_str(I,j,k) & + + dt * CS%a_u(I,j,K) * ADp%du_dt_str(I,j,k-1)) * b1(I,j) + enddo + endif enddo - - if (associated(ADp%du_dt_str)) then - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(I,j) > 0.) - ADp%du_dt_str(I,j,k) = (CS%h_u(I,j,k) * ADp%du_dt_str(I,j,k) & - + dt * CS%a_u(I,j,K) * ADp%du_dt_str(I,j,k-1)) * b1(I,j) - enddo - endif enddo ! back substitute to solve for the new velocities ! u_k = d'_k - c'_k x_(k+1) - do k=nz-1,1,-1 - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(I,j) > 0.0) - u(I,j,k) = u(I,j,k) + c1(I,j,k+1) * u(I,j,k+1) + do concurrent (j=G%jsc:G%jec) + do k=nz-1,1,-1 + do concurrent (I=Isq:Ieq, G%mask2dCu(I,j) > 0.0) + u(I,j,k) = u(I,j,k) + c1(I,j,k+1) * u(I,j,k+1) + enddo enddo enddo @@ -1021,32 +1027,34 @@ subroutine vertvisc(u, v, h, forces, visc, dt, OBC, ADp, CDp, G, GV, US, CS, & enddo endif - do k=2,nz - if (allocated(visc%Ray_v)) then - do concurrent (j=jsq:jeq, i=is:ie) - Ray(i,J) = visc%Ray_v(i,J,k) + do concurrent (j=Jsq:Jeq) + do k=2,nz + if (allocated(visc%Ray_v)) then + do concurrent (i=is:ie) + Ray(i,J) = visc%Ray_v(i,J,k) + enddo + endif + + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.0) + c1(i,J,k) = dt * CS%a_v(i,J,K) * b1(i,J) + b_denom_1 = CS%h_v(i,J,k) + dt * (Ray(i,J) + CS%a_v(i,J,K)*d1(i,J)) + b1(i,J) = 1.0 / (b_denom_1 + dt * CS%a_v(i,J,K+1)) + d1(i,J) = b_denom_1 * b1(i,J) + v(i,J,k) = (CS%h_v(i,J,k) * v(i,J,k) + dt * CS%a_v(i,J,K) * v(i,J,k-1)) * b1(i,J) enddo - endif - do concurrent (j=jsq:jeq, i=is:ie, G%mask2dCv(i,j) > 0.0) - c1(i,J,k) = dt * CS%a_v(i,J,K) * b1(i,J) - b_denom_1 = CS%h_v(i,J,k) + dt * (Ray(i,J) + CS%a_v(i,J,K)*d1(i,J)) - b1(i,J) = 1.0 / (b_denom_1 + dt * CS%a_v(i,J,K+1)) - d1(i,J) = b_denom_1 * b1(i,J) - v(i,J,k) = (CS%h_v(i,J,k) * v(i,J,k) + dt * CS%a_v(i,J,K) * v(i,J,k-1)) * b1(i,J) + if (associated(ADp%dv_dt_str)) then + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.0) + ADp%dv_dt_str(i,J,k) = (CS%h_v(i,J,k) * ADp%dv_dt_str(i,J,k) & + + dt * CS%a_v(i,J,K) * ADp%dv_dt_str(i,J,k-1)) * b1(i,J) + enddo + endif enddo - if (associated(ADp%dv_dt_str)) then - do concurrent (J=Jsq:Jeq, i=is:ie, G%mask2dCv(i,j) > 0.0) - ADp%dv_dt_str(i,J,k) = (CS%h_v(i,J,k) * ADp%dv_dt_str(i,J,k) & - + dt * CS%a_v(i,J,K) * ADp%dv_dt_str(i,J,k-1)) * b1(i,J) + do k=nz-1,1,-1 + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.0) + v(i,J,k) = v(i,J,k) + c1(i,J,k+1) * v(i,J,k+1) enddo - endif - enddo - - do k=nz-1,1,-1 - do concurrent (j=jsq:jeq, i=is:ie, G%mask2dCv(i,j) > 0.0) - v(i,J,k) = v(i,J,k) + c1(i,J,k+1) * v(i,J,k+1) enddo enddo @@ -1306,85 +1314,89 @@ subroutine vertvisc_remnant(visc, visc_rem_u, visc_rem_v, dt, G, GV, US, CS) !$omp target enter data map(alloc: b1, c1, d1, Ray, b_denom_1) - if (allocated(visc%Ray_u)) then - do j=G%jsc,G%jec ; do I=Isq,Ieq - Ray(I,j) = visc%Ray_u(I,j,1) - enddo ; enddo - else - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq) - Ray(I,j) = 0. - enddo - endif - - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(i,j) > 0.) - b_denom_1 = CS%h_u(I,j,1) + dt * (Ray(I,j) + CS%a_u(I,j,1)) - b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,2)) - d1(I,j) = b_denom_1 * b1(I,j) - visc_rem_u(I,j,1) = b1(I,j) * CS%h_u(I,j,1) - enddo - - - do k=2,nz + do concurrent (j=G%jsc:G%jec) if (allocated(visc%Ray_u)) then - do j=G%jsc,G%jec ; do I=Isq,Ieq - Ray(I,j) = visc%Ray_u(I,j,k) - enddo ; enddo + do concurrent (I=Isq:Ieq) + Ray(I,j) = visc%Ray_u(I,j,1) + enddo + else + do concurrent (I=Isq:Ieq) + Ray(I,j) = 0. + enddo endif - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(I,j) > 0.) - c1(I,j,k) = dt * CS%a_u(I,j,K)*b1(I,j) - b_denom_1 = CS%h_u(I,j,k) + dt * (Ray(I,j) + CS%a_u(I,j,K) * d1(I,j)) - b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,K+1)) + do concurrent (I=Isq:Ieq, G%mask2dCu(i,j) > 0.) + b_denom_1 = CS%h_u(I,j,1) + dt * (Ray(I,j) + CS%a_u(I,j,1)) + b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,2)) d1(I,j) = b_denom_1 * b1(I,j) - visc_rem_u(I,j,k) = (CS%h_u(I,j,k) + dt * CS%a_u(I,j,K) * visc_rem_u(I,j,k-1)) * b1(I,j) + visc_rem_u(I,j,1) = b1(I,j) * CS%h_u(I,j,1) enddo - enddo - do k=nz-1,1,-1 - do concurrent (j=G%jsc:G%jec, I=Isq:Ieq, G%mask2dCu(I,j) > 0.) - visc_rem_u(I,j,k) = visc_rem_u(I,j,k) + c1(I,j,k+1) * visc_rem_u(I,j,k+1) - enddo - enddo + do k=2,nz + if (allocated(visc%Ray_u)) then + do concurrent (I=Isq:Ieq) + Ray(I,j) = visc%Ray_u(I,j,k) + enddo + endif - ! Now find the meridional viscous remnant using the robust tridiagonal solver. - if (allocated(visc%Ray_v)) then - do J=Jsq,Jeq ; do i=is,ie - Ray(i,J) = visc%Ray_v(i,J,1) - enddo ; enddo - else - do concurrent (j=jsq:jeq, i=is:ie) - Ray(i,J) = 0. + do concurrent (I=Isq:Ieq, G%mask2dCu(I,j) > 0.) + c1(I,j,k) = dt * CS%a_u(I,j,K)*b1(I,j) + b_denom_1 = CS%h_u(I,j,k) + dt * (Ray(I,j) + CS%a_u(I,j,K) * d1(I,j)) + b1(I,j) = 1.0 / (b_denom_1 + dt * CS%a_u(I,j,K+1)) + d1(I,j) = b_denom_1 * b1(I,j) + visc_rem_u(I,j,k) = (CS%h_u(I,j,k) + dt * CS%a_u(I,j,K) * visc_rem_u(I,j,k-1)) * b1(I,j) + enddo enddo - endif - do concurrent (J=Jsq:Jeq, i=is:ie, G%mask2dCv(i,j) > 0.) - b_denom_1 = CS%h_v(i,J,1) + dt * (Ray(i,J) + CS%a_v(i,J,1)) - b1(i,J) = 1.0 / (b_denom_1 + dt*CS%a_v(i,J,2)) - d1(i,J) = b_denom_1 * b1(i,J) - visc_rem_v(i,J,1) = b1(i,J) * CS%h_v(i,J,1) + do k=nz-1,1,-1 + do concurrent (I=Isq:Ieq, G%mask2dCu(I,j) > 0.) + visc_rem_u(I,j,k) = visc_rem_u(I,j,k) + c1(I,j,k+1) * visc_rem_u(I,j,k+1) + enddo + enddo enddo - do k=2,nz + ! Now find the meridional viscous remnant using the robust tridiagonal solver. + do concurrent (J=Jsq:Jeq) if (allocated(visc%Ray_v)) then - do J=Jsq,Jeq ; do i=is,ie - Ray(i,J) = visc%Ray_v(i,J,k) - enddo ; enddo + do concurrent (i=is:ie) + Ray(i,J) = visc%Ray_v(i,J,1) + enddo + else + do concurrent (i=is:ie) + Ray(i,J) = 0. + enddo endif - do concurrent (J=Jsq:Jeq, i=is:ie, G%mask2dCv(i,j) > 0.) - c1(i,J,k) = dt * CS%a_v(i,J,K) * b1(i,J) - b_denom_1 = CS%h_v(i,J,k) + dt * (Ray(i,J) + CS%a_v(i,J,K) * d1(i,J)) - b1(i,J) = 1.0 / (b_denom_1 + dt * CS%a_v(i,J,K+1)) + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.) + b_denom_1 = CS%h_v(i,J,1) + dt * (Ray(i,J) + CS%a_v(i,J,1)) + b1(i,J) = 1.0 / (b_denom_1 + dt*CS%a_v(i,J,2)) d1(i,J) = b_denom_1 * b1(i,J) - visc_rem_v(i,J,k) = (CS%h_v(i,J,k) + dt * CS%a_v(i,J,K) * visc_rem_v(i,J,k-1)) * b1(i,J) + visc_rem_v(i,J,1) = b1(i,J) * CS%h_v(i,J,1) + enddo + + do k=2,nz + if (allocated(visc%Ray_v)) then + do concurrent (i=is:ie) + Ray(i,J) = visc%Ray_v(i,J,k) + enddo + endif + + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.) + c1(i,J,k) = dt * CS%a_v(i,J,K) * b1(i,J) + b_denom_1 = CS%h_v(i,J,k) + dt * (Ray(i,J) + CS%a_v(i,J,K) * d1(i,J)) + b1(i,J) = 1.0 / (b_denom_1 + dt * CS%a_v(i,J,K+1)) + d1(i,J) = b_denom_1 * b1(i,J) + visc_rem_v(i,J,k) = (CS%h_v(i,J,k) + dt * CS%a_v(i,J,K) * visc_rem_v(i,J,k-1)) * b1(i,J) + enddo enddo - enddo - do k=nz-1,1,-1 - do concurrent (J=Jsq:Jeq, i=is:ie, G%mask2dCv(i,j) > 0.) - visc_rem_v(i,J,k) = visc_rem_v(i,J,k) + c1(i,J,k+1) * visc_rem_v(i,J,k+1) + do k=nz-1,1,-1 + do concurrent (i=is:ie, G%mask2dCv(i,j) > 0.) + visc_rem_v(i,J,k) = visc_rem_v(i,J,k) + c1(i,J,k+1) * visc_rem_v(i,J,k+1) + enddo enddo enddo + !$omp target exit data map(delete: b1, c1, d1, Ray, b_denom_1) if (CS%debug) then @@ -1511,10 +1523,16 @@ subroutine vertvisc_coef(u, v, h, dz, forces, visc, tv, dt, G, GV, US, CS, OBC, if (.not.CS%initialized) call MOM_error(FATAL,"MOM_vert_friction(coef): "// & "Module must be initialized before it is used.") + !$omp target enter data map(alloc: I_Hbbl) + h_neglect = GV%H_subroundoff dz_neglect = GV%dZ_subroundoff a_cpl_max = 1.0e37 * GV%m_to_H * US%T_to_s - I_Hbbl(:,:) = 1. / (CS%Hbbl + dz_neglect) + do concurrent (j=SZJB_(G), i=SZIB_(G)) + I_Hbbl(i,j) = 1. / (CS%Hbbl + dz_neglect) + enddo + + ! below allocations and initializations will probably be slow on CPU if (CS%use_GL90_in_SSW) then I_Hbbl_gl90(:,:) = 1. / (CS%Hbbl_gl90 + dz_neglect) endif @@ -1552,8 +1570,8 @@ subroutine vertvisc_coef(u, v, h, dz, forces, visc, tv, dt, G, GV, US, CS, OBC, js_W_OBC = max(js, OBC%Js_u_W_obc) ; je_W_OBC = min(je, OBC%je_u_W_obc) endif + !$omp target enter data map(alloc: Ustar_2d) call find_ustar(forces, tv, Ustar_2d, G, GV, US, halo=1) - !$omp target enter data map(to: Ustar_2d) ! First do u-points @@ -1917,15 +1935,17 @@ subroutine vertvisc_coef(u, v, h, dz, forces, visc, tv, dt, G, GV, US, CS, OBC, enddo endif - do K=1,nz+1 - do concurrent (j=js:je, i=isq:ieq, do_i(i,j)) - CS%a_u(I,j,K) = min(a_cpl_max, a_cpl(I,j,K)) + do concurrent (j=js:je) + do K=1,nz+1 + do concurrent (i=isq:ieq, do_i(i,j)) + CS%a_u(I,j,K) = min(a_cpl_max, a_cpl(I,j,K)) + enddo enddo - enddo - do k=1,nz - do concurrent (j=js:je, i=isq:ieq, do_i(i,j)) - CS%h_u(I,j,k) = hvel(I,j,k) + h_neglect + do k=1,nz + do concurrent (i=isq:ieq, do_i(i,j)) + CS%h_u(I,j,k) = hvel(I,j,k) + h_neglect + enddo enddo enddo endif @@ -2302,15 +2322,17 @@ subroutine vertvisc_coef(u, v, h, dz, forces, visc, tv, dt, G, GV, US, CS, OBC, enddo endif - do K=1,nz+1 - do concurrent (J=Jsq:Jeq, i=is:ie, do_i(i,j)) - CS%a_v(i,J,K) = min(a_cpl_max, a_cpl(i,J,K)) + do concurrent (J=Jsq:Jeq) + do K=1,nz+1 + do concurrent (i=is:ie, do_i(i,j)) + CS%a_v(i,J,K) = min(a_cpl_max, a_cpl(i,J,K)) + enddo enddo - enddo - do k=1,nz - do concurrent (J=Jsq:Jeq, i=is:ie, do_i(i,j)) - CS%h_v(i,J,k) = hvel(i,J,k) + h_neglect + do k=1,nz + do concurrent (i=is:ie, do_i(i,j)) + CS%h_v(i,J,k) = hvel(i,J,k) + h_neglect + enddo enddo enddo endif @@ -2460,7 +2482,6 @@ subroutine find_coupling_coef(a_cpl, hvel, do_i, h_harm, bbl_thick, kv_bbl, z_i, integer :: nz, max_nk integer :: is_N_OBC, is_S_OBC, Is_E_OBC, Is_W_OBC, ie_N_OBC, ie_S_OBC, Ie_E_OBC, Ie_W_OBC integer :: js_N_OBC, js_S_OBC, Js_E_OBC, Js_W_OBC, je_N_OBC, je_S_OBC, Je_E_OBC, Je_W_OBC - if (work_on_u) then Is = G%IscB ; Ie = G%IecB js = G%jsc ; je = G%jec @@ -2865,19 +2886,21 @@ subroutine find_coupling_coef(a_cpl, hvel, do_i, h_harm, bbl_thick, kv_bbl, z_i, max_nk = 0 if (work_on_u) then - do concurrent (j=js:je, i=is:ie, do_i(i,j)) + do concurrent (j=js:je, i=is:ie, do_i(i,j)) DO_LOCALITY(reduce(max:max_nk)) nk_in_ml(I,j) = ceiling(visc%nkml_visc_u(I,j)) max_nk = max(max_nk, nk_in_ml(I,j)) enddo ! TODO: max_nk is transferred back, try to keep it on the device - do k=1,max_nk - do concurrent (j=js:je, i=is:ie, do_i(i,j)) - if (k <= visc%nkml_visc_u(I,j)) then ! This layer is all in the ML. - h_ml(i,j) = h_ml(i,j) + hvel(i,j,k) - elseif (k < visc%nkml_visc_u(I,j) + 1.) then ! Part of this layer is in the ML. - h_ml(i,j) = h_ml(i,j) + ((visc%nkml_visc_u(I,j) + 1.) - k) * hvel(i,j,k) - endif + do concurrent (j=js:je) + do k=1,max_nk + do concurrent (i=is:ie, do_i(i,j)) + if (k <= visc%nkml_visc_u(I,j)) then ! This layer is all in the ML. + h_ml(i,j) = h_ml(i,j) + hvel(i,j,k) + elseif (k < visc%nkml_visc_u(I,j) + 1.) then ! Part of this layer is in the ML. + h_ml(i,j) = h_ml(i,j) + ((visc%nkml_visc_u(I,j) + 1.) - k) * hvel(i,j,k) + endif + enddo enddo enddo else @@ -2999,26 +3022,28 @@ subroutine find_coupling_coef(a_cpl, hvel, do_i, h_harm, bbl_thick, kv_bbl, z_i, endif ; enddo ; enddo enddo else - do K=2,max_nk - do concurrent (j=js:je, i=is:ie, k <= nk_in_ml(i,j)) - z_t(i,j) = z_t(i,j) + hvel(i,j,k-1) - - temp1 = (z_t(i,j) * h_ml(i,j) - z_t(i,j) * z_t(i,j)) - ! This viscosity is set to go to 0 at the mixed layer top and bottom (in a log-layer) - ! and be further limited by rotation to give the natural Ekman length. - ! The following expressions are mathematically equivalent. - if (GV%Boussinesq .or. (CS%answer_date < 20230601)) then - visc_ml = u_star(i,j) * CS%vonKar * (GV%Z_to_H * temp1 * u_star(i,j)) & - / (absf(i,j) * temp1 + (h_ml(i,j)+h_neglect) * u_star(i,j)) - else - visc_ml = CS%vonKar * (temp1 * tau_mag(i,j)) & - / (absf(i,j) * temp1 + (h_ml(i,j) + h_neglect) * u_star(i,j)) - endif - a_ml = visc_ml / (0.25 * (hvel(i,j,k) + hvel(i,j,k-1) + h_neglect) + 0.5 * I_amax * visc_ml) + do concurrent (j=js:je) + do K=2,max_nk + do concurrent (i=is:ie, k <= nk_in_ml(i,j)) + z_t(i,j) = z_t(i,j) + hvel(i,j,k-1) + + temp1 = (z_t(i,j) * h_ml(i,j) - z_t(i,j) * z_t(i,j)) + ! This viscosity is set to go to 0 at the mixed layer top and bottom (in a log-layer) + ! and be further limited by rotation to give the natural Ekman length. + ! The following expressions are mathematically equivalent. + if (GV%Boussinesq .or. (CS%answer_date < 20230601)) then + visc_ml = u_star(i,j) * CS%vonKar * (GV%Z_to_H * temp1 * u_star(i,j)) & + / (absf(i,j) * temp1 + (h_ml(i,j)+h_neglect) * u_star(i,j)) + else + visc_ml = CS%vonKar * (temp1 * tau_mag(i,j)) & + / (absf(i,j) * temp1 + (h_ml(i,j) + h_neglect) * u_star(i,j)) + endif + a_ml = visc_ml / (0.25 * (hvel(i,j,k) + hvel(i,j,k-1) + h_neglect) + 0.5 * I_amax * visc_ml) - ! Choose the largest estimate of a_cpl, but these could be changed to be additive. - a_cpl(i,j,K) = max(a_cpl(i,j,K), a_ml) - ! An option could be added to change this to: a_cpl(i,K) = a_cpl(i,K) + a_ml + ! Choose the largest estimate of a_cpl, but these could be changed to be additive. + a_cpl(i,j,K) = max(a_cpl(i,j,K), a_ml) + ! An option could be added to change this to: a_cpl(i,K) = a_cpl(i,K) + a_ml + enddo enddo enddo endif @@ -3261,9 +3286,9 @@ subroutine vertvisc_limit_vel(u, v, h, ADp, CDp, forces, visc, dt, G, GV, US, CS endif endif !$omp target exit data map(release: dowrite, vel_report, trunc_any_array) - !$omp target exit data map(from:u_old, v_old) if (len_trim(CS%u_trunc_file) > 0) then + !$omp target update from(u_old) !I need to port this do j=js,je ; do I=Isq,Ieq ; if (dowrite(I,j)) then ! Call a diagnostic reporting subroutines are called if unphysically large values are found. @@ -3273,6 +3298,7 @@ subroutine vertvisc_limit_vel(u, v, h, ADp, CDp, forces, visc, dt, G, GV, US, CS endif if (len_trim(CS%v_trunc_file) > 0) then + !$omp target update from(v_old) do J=Jsq,Jeq ; do i=is,ie ; if (dowrite(i,J)) then ! Call a diagnostic reporting subroutines are called if unphysically large values are found. call write_v_accel(i, J, v_old, h, ADp, CDp, dt, G, GV, US, CS%PointAccel_CSp, &