Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions modulefiles/gsi_wcoss2.intel.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ local cray_mpich_ver=os.getenv("cray_mpich_ver") or "8.1.9"
local cmake_ver= os.getenv("cmake_ver") or "3.20.2"
local prod_util_ver=os.getenv("prod_util_ver") or "2.0.10"

local zlib_ver=os.getenv("zlib_ver") or "1.2.11"
local bufr_ver=os.getenv("bufr_ver") or "11.7.0"
local hdf5_ver=os.getenv("hdf5_ver") or "1.14.0"
local pnetcdf_ver=os.getenv("pnetcdf_ver") or "1.12.2"
local netcdf_ver=os.getenv("netcdf_ver") or "4.9.2"
Expand All @@ -33,6 +35,7 @@ load(pathJoin("cmake", cmake_ver))

load(pathJoin("prod_util", prod_util_ver))

load(pathJoin("zlib", zlib_ver))
load(pathJoin("hdf5-D", hdf5_ver))
load(pathJoin("pnetcdf-D", pnetcdf_ver))
load(pathJoin("netcdf-D", netcdf_ver))
Expand Down
26 changes: 15 additions & 11 deletions src/enkf/enkf.f90
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ subroutine enkf_update()
integer(i_kind) nanal,nn,nnn,nobm,nsame,nn1,nn2,oz_ind,nlev,dbz_ind
real(r_single),dimension(nlevs_pres):: taperv
logical lastiter, kdgrid, kdobs
real(r_single) :: meanval

! allocate temporary arrays.
allocate(anal_obchunk(nanals,nobs_max))
Expand Down Expand Up @@ -764,22 +765,25 @@ subroutine enkf_update()

! make sure posterior perturbations still have zero mean.
! (roundoff errors can accumulate)
meanval=zero !use firstprivate to avoid "false sharing'
if (lastiter .and. .not. lupd_obspace_serial) then
!$omp parallel do schedule(dynamic) private(npt,nb,i)
do npt=1,npts_max
do nb=1,nbackgrounds
do i=1,ncdim
anal_chunk(1:nanals,npt,i,nb) = anal_chunk(1:nanals,npt,i,nb)-&
sum(anal_chunk(1:nanals,npt,i,nb),1)*r_nanals
end do
end do
enddo
!$omp parallel do schedule(dynamic) private(npt, nb, i) firstprivate(meanval)
do npt = 1, npts_max
do nb = 1, nbackgrounds
do i = 1, ncdim
meanval = sum(anal_chunk(1:nanals, npt, i, nb), 1) * r_nanals
anal_chunk(1:nanals, npt, i, nb) = anal_chunk(1:nanals, npt, i, nb) - meanval
end do
end do
end do
!$omp end parallel do
endif
!$omp parallel do schedule(dynamic) private(nob)
meanval=zero !use firstprivate to avoid "false sharing"
!$omp parallel do schedule(dynamic) private(nob) firstprivate(meanval)
do nob=1,nobs_max
meanval=sum(anal_obchunk(1:nanals,nob),1)
anal_obchunk(1:nanals,nob) = anal_obchunk(1:nanals,nob)-&
sum(anal_obchunk(1:nanals,nob),1)*r_nanals
meanval*r_nanals
enddo
!$omp end parallel do

Expand Down
5 changes: 5 additions & 0 deletions src/enkf/loadbal.f90
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ subroutine load_balance()
! assume work load proportional to number of 'nearby' obs
call estimate_work_enkf1(numobs) ! fill numobs array with number of obs per horiz point
! distribute the results of estimate_work to all processors.
!clt call mpi_barrier(mpi_comm_world,ierr) !added, for debug mode, on wcoss2 ,otherwise,
!clt !" MPICH suspects a hang due to rendezvous message resource exhaustion."
!clt commented out now for the noticeable degraded performance
call mpi_allreduce(mpi_in_place,numobs,npts,mpi_integer,mpi_sum,mpi_comm_world,ierr)
if (letkf_flag .and. nobsl_max > 0) then
where(numobs > nobsl_max) numobs = nobsl_max
Expand Down Expand Up @@ -298,6 +301,7 @@ subroutine load_balance()
end if
! for serial enkf, create observation priors to be updated on each processor.
allocate(anal_obchunk_prior(nanals,nobs_max))
anal_obchunk_prior=zero
do nob1=1,numobsperproc(nproc+1)
nob2 = indxproc_obs(nproc+1,nob1)
anal_obchunk_prior(1:nanals,nob1) = anal_ob(1:nanals,nob2)
Expand Down Expand Up @@ -357,6 +361,7 @@ subroutine scatter_chunks
allocate(rcounts(0:numproc-1))
! allocate array to hold pieces of state vector on each proc.
allocate(anal_chunk(nanals,npts_max,ncdim,nbackgrounds))
anal_chunk=zero
if (nproc == 0) print *,'anal_chunk size = ',size(anal_chunk,kind=8)

! only IO tasks send any data.
Expand Down
2 changes: 2 additions & 0 deletions src/gsi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ find_package(NetCDF REQUIRED Fortran)
if(OPENMP)
find_package(OpenMP REQUIRED)
endif()
find_package(ZLIB REQUIRED)

# NCEPLibs dependencies
find_package(bacio REQUIRED)
Expand Down Expand Up @@ -156,6 +157,7 @@ target_link_libraries(gsi_fortran_obj PUBLIC w3emc::w3emc_d)
target_link_libraries(gsi_fortran_obj PUBLIC ip::ip_d)
target_link_libraries(gsi_fortran_obj PUBLIC bufr::bufr_4)
target_link_libraries(gsi_fortran_obj PUBLIC crtm::crtm)
target_link_libraries(gsi_fortran_obj PUBLIC ZLIB::ZLIB)
if(GSI_MODE MATCHES "Regional")
target_link_libraries(gsi_fortran_obj PUBLIC wrf_io::wrf_io)
endif()
Expand Down
4 changes: 2 additions & 2 deletions src/gsi/anisofilter.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5415,16 +5415,16 @@ subroutine get2berr_reg_subdomain_option(mype)
!
! CONVERT bckgvar4 FROM FILTER GRID TO ANALYSIS GRID BEFORE WRITING OUT
!
if(mype==0) then
bckgvar8f=bckgvar4f
call fgrid2agrid(pf2aP1,bckgvar8f,bckgvar8a)
bckgvar4=bckgvar8a
if(mype==0) then
ivar=jdvar(k)
chvarname=fvarname(ivar)
open (94,file='bckgvar.dat_'//trim(chvarname),form='unformatted')
write(94) bckgvar4
close(94)
end if
end if
enddo
deallocate(bckgvar4t,bckgvar4f,bckgvar4,bckgvar8f,bckgvar8a,bckgvar0f)

Expand Down
2 changes: 2 additions & 0 deletions src/gsi/combine_radobs.f90
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ subroutine combine_radobs(mype_sub,mype_root,&
end if
deallocate(icrit)
allocate(data_all_in(nele,ndata))
data_all_in=zero
!$omp parallel do private(kk,k,l)
do kk=1,ndata
k=nloc(kk)
Expand All @@ -131,6 +132,7 @@ subroutine combine_radobs(mype_sub,mype_root,&

end do
deallocate(nloc)


! get all data on process mype_root
! data_all(:,:) = zero
Expand Down
47 changes: 43 additions & 4 deletions src/gsi/cplr_get_fv3_regional_ensperts.f90
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
use netcdf_mod , only: nc_check
use gsi_rfv3io_mod, only: fv3lam_io_phymetvars3d_nouv
use obsmod, only: if_model_dbz,if_model_fed
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS, MPI_MAX


implicit none
Expand Down Expand Up @@ -119,6 +120,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
integer(i_kind):: imem_start,n_fv3sar

integer(i_kind):: i_caseflag
real(kind=8) :: time_beg,time_end,walltime, tb,te,wt
integer(i_kind) :: ierr

if(n_ens/=(n_ens_gfs+n_ens_fv3sar)) then
write(6,*)'wrong, the sum of n_ens_gfs and n_ens_fv3sar not equal n_ens, stop'
Expand Down Expand Up @@ -275,7 +278,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
end if
end if


tb=MPI_Wtime()
do m=1,ntlevs_ens


Expand Down Expand Up @@ -452,7 +455,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
if( .not. parallelization_over_ensmembers )then
if (mype == 0) write(6,'(a,a)') &
'CALL READ_FV3_REGIONAL_ENSPERTS FOR ENS DATA with the filename str : ',trim(ensfilenam_str)


time_beg=MPI_Wtime()
select case (i_caseflag)
case (0)
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz)
Expand All @@ -469,9 +473,15 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, &
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_fed=fed)
case (5)
!write(6,'("get_fv3_regional_ensperts_run: Before general_read_fv3_regional")')
call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, &
g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_dbz=dbz,g_fed=fed)
end select
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime for general_read_fv3_regional" f15.4,I4)') walltime,i_caseflag

end if

if( parallelization_over_ensmembers )then
Expand Down Expand Up @@ -792,6 +802,11 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar)
end do

enddo ! it 4d loop
te=MPI_Wtime()
call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_fv3sar,wt

! CALCULATE ENSEMBLE SPREAD
if(write_ens_sprd ) then
call this%ens_spread_dualres_regional(mype,en_perts,nelen)
Expand Down Expand Up @@ -851,7 +866,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
use hybrid_ensemble_parameters, only: grd_ens,q_hyb_ens
use hybrid_ensemble_parameters, only: fv3sar_ensemble_opt,dual_res

use mpimod, only: mpi_comm_world,mpi_rtype
use mpimod, only: mpi_comm_world,mpi_rtype,mype
use gsi_rfv3io_mod,only: type_fv3regfilenameg
use gsi_rfv3io_mod,only:n2d
use constants, only: half,zero
Expand All @@ -870,6 +885,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
use directDA_radaruse_mod, only: l_use_cvpqx, cvpqx_pval, cld_nt_updt
use directDA_radaruse_mod, only: l_cvpnr, cvpnr_pval
use obsmod, only:if_model_dbz,if_model_fed
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_MAX, MPI_SUCCESS


implicit none
Expand Down Expand Up @@ -913,6 +929,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
character(len=:),allocatable :: sfcdata !='fv3_sfcdata'
character(len=:),allocatable :: couplerres!='coupler.res'
integer (i_kind) ier,istatus
real(kind=8) :: time_beg,time_end,walltime
integer(i_kind) :: ierr


associate( this => this ) ! eliminates warning for unused dummy argument needed for binding
Expand All @@ -930,6 +948,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
couplerres=fv3_filenameginput%couplerres


!write(6,'("general_read_fv3_regional: fv3sar_ensemble_opt= " I4)') fv3sar_ensemble_opt


if (allocated(fv3lam_ens_io_dynmetvars2d_nouv) ) then
Expand All @@ -956,16 +975,28 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
end if


if(fv3sar_ensemble_opt == 0 ) then
if(fv3sar_ensemble_opt == 0 ) then
time_beg=MPI_Wtime()
call gsi_fv3ncdf_readuv(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_readuv" f15.4)') walltime

else
call gsi_fv3ncdf_readuv_v1(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res)
endif
if(fv3sar_ensemble_opt == 0) then
time_beg=MPI_Wtime()
call gsi_fv3ncdf_read(grd_fv3lam_ens_dynvar_io_nouv,gsibundle_fv3lam_ens_dynvar_nouv,&
fv3_filenameginput%dynvars,fv3_filenameginput,dual_res)
call gsi_fv3ncdf_read(grd_fv3lam_ens_tracer_io_nouv,gsibundle_fv3lam_ens_tracer_nouv,&
fv3_filenameginput%tracers,fv3_filenameginput,dual_res)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_read" f15.4)') walltime

if( if_model_dbz .or. if_model_fed ) then
call gsi_fv3ncdf_read(grd_fv3lam_ens_phyvar_io_nouv,gsibundle_fv3lam_ens_phyvar_nouv,&
fv3_filenameginput%phyvars,fv3_filenameginput,dual_res)
Expand Down Expand Up @@ -1013,6 +1044,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
endif

!! tsen2tv !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,g_q,g_tsen,g_tv,fv)
do k=1,grd_ens%nsig
do j=1,grd_ens%lon2
do i=1,grd_ens%lat2
Expand All @@ -1023,6 +1056,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
if (.not.q_hyb_ens) then
ice=.true.
iderivative=0
!$omp parallel do default(none),private(k,i,j,kp) &
!$omp shared(grd_ens,g_prsi,g_prsl)
do k=1,grd_ens%nsig
kp=k+1
do j=1,grd_ens%lon2
Expand All @@ -1032,6 +1067,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g
end do
end do
call genqsat(g_rh,g_tsen(1,1,1),g_prsl(1,1,1),grd_ens%lat2,grd_ens%lon2,grd_ens%nsig,ice,iderivative)
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,g_rh,g_q)
do k=1,grd_ens%nsig
do j=1,grd_ens%lon2
do i=1,grd_ens%lat2
Expand All @@ -1051,6 +1088,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g


! CV transform
!$omp parallel do default(none),private(k,i,j) &
!$omp shared(grd_ens,l_use_cvpqx,g_qr,cvpqx_pval,g_qs,g_qg,g_qnr,cld_nt_updt,l_cvpnr,cvpnr_pval)
do k=1,grd_ens%nsig
do i=1,grd_ens%lon2
do j=1,grd_ens%lat2
Expand Down
23 changes: 23 additions & 0 deletions src/gsi/crc32.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module crc32
use iso_c_binding
implicit none
public :: digest

interface
integer function digest_c(message) bind(c)
use iso_c_binding, only: c_char
character(kind=c_char), intent(in) :: message(*)
end function digest_c
end interface

contains

integer function digest(m)
use iso_c_binding, only: c_null_char
implicit none
character(len=*), intent(in) :: m
!m='nid001019'
digest=abs(digest_c(trim(m)//c_null_char))
!write(6,'("Digest ",I12)') digest
end function digest
end module crc32
7 changes: 7 additions & 0 deletions src/gsi/crc32_c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <stdio.h>
#include <string.h>
#include <zlib.h>

uLong digest_c(char * message) {
return crc32(0, (const void*)message, strlen(message));
}
13 changes: 13 additions & 0 deletions src/gsi/get_gefs_for_regional.f90
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ subroutine get_gefs_for_regional
use get_wrf_mass_ensperts_mod, only: get_wrf_mass_ensperts_class
use gsi_io, only: verbose
use obsmod, only: l_wcp_cwm
use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS
implicit none

type(sub2grid_info) grd_gfs,grd_mix,grd_gfst
Expand Down Expand Up @@ -187,6 +188,8 @@ subroutine get_gefs_for_regional
real(r_kind), pointer :: ges_q (:,:,:)=>NULL()
logical :: print_verbose
real(r_kind), allocatable :: ges_z_ens(:,:)
real(kind=8) :: time_beg,time_end,walltime, tb,te,wt
integer(i_kind) :: ierr

print_verbose=.false.
if(verbose)print_verbose=.true.
Expand Down Expand Up @@ -621,6 +624,7 @@ subroutine get_gefs_for_regional

rewind(10)
inithead=.true.
tb=MPI_Wtime()
do n=1,n_ens_gfs
read(10,'(a)',err=20,end=20)filename
filename=trim(ensemble_path) // trim(filename)
Expand All @@ -641,8 +645,13 @@ subroutine get_gefs_for_regional
call general_read_gfsatm_nems(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,.true.,iret)
else if (use_gfs_ncio) then
time_beg=MPI_Wtime()
call general_read_gfsatm_nc(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,.true.,iret)
time_end=MPI_Wtime()
call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime for general_read_gfsatm_nc" f15.4)') walltime
else
call general_read_gfsatm(grd_gfst,sp_gfs,sp_gfs,filename,uv_hyb_ens,.false.,.true., &
atm_bundle,inithead,iret)
Expand Down Expand Up @@ -955,6 +964,10 @@ subroutine get_gefs_for_regional
! if(mype==0) write(6,*)' with halo, n,min,max ges_ps - matt ps =',n,pdiffmin0,pdiffmax0

end do ! end loop over ensemble members.
te=MPI_Wtime()
call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr)
if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr
if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_gfs,wt

deallocate(ges_z_ens)

Expand Down
2 changes: 2 additions & 0 deletions src/gsi/gsi_files.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
list(APPEND GSI_SRC_C
blockIO.c
crc32_c.c
)

# Class files for WRF interfaces
Expand Down Expand Up @@ -265,6 +266,7 @@ guess_grids.F90
half_nmm_grid2.f90
hdraobmod.f90
hilbert_curve.f90
crc32.F90
hybrid_ensemble_isotropic.F90
hybrid_ensemble_parameters.f90
inc2guess.f90
Expand Down
Loading
Loading