Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 143 additions & 21 deletions cesm/driver/ensemble_driver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ module Ensemble_driver

public :: SetServices
private :: SetModelServices
private :: ensemble_finalize

integer, allocatable :: asyncio_petlist(:)
logical :: asyncio_task=.false.
logical :: asyncIO_available=.false.

character(*),parameter :: u_FILE_u = &
__FILE__
Expand All @@ -26,9 +31,12 @@ module Ensemble_driver

subroutine SetServices(ensemble_driver, rc)

use NUOPC , only : NUOPC_CompDerive, NUOPC_CompSpecialize
use NUOPC , only : NUOPC_CompDerive, NUOPC_CompSpecialize, NUOPC_CompAttributeSet
use NUOPC , only : NUOPC_CompAttributeGet
use NUOPC_Driver , only : driver_routine_SS => SetServices
use NUOPC_Driver , only : ensemble_label_SetModelServices => label_SetModelServices
use NUOPC_Driver , only : ensemble_label_PostChildrenAdvertise => label_PostChildrenAdvertise
use NUOPC_Driver , only : label_Finalize
use ESMF , only : ESMF_GridComp, ESMF_GridCompSet
use ESMF , only : ESMF_Config, ESMF_ConfigCreate, ESMF_ConfigLoadFile
use ESMF , only : ESMF_SUCCESS, ESMF_LogWrite, ESMF_LOGMSG_INFO
Expand All @@ -38,6 +46,7 @@ subroutine SetServices(ensemble_driver, rc)

! local variables
type(ESMF_Config) :: config
logical :: isPresent
character(len=*), parameter :: subname = "(ensemble_driver.F90:SetServices)"
!---------------------------------------

Expand All @@ -53,6 +62,14 @@ subroutine SetServices(ensemble_driver, rc)
specRoutine=SetModelServices, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! ModifyCplLists is a NUOPC specialization which happens after Advertize but before Realize

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment should be changed to say "PostChildrenAdvertise is a..."

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

! We have overloaded this specialization location to initilize IO.
! So after all components have called Advertise but before any component calls Realize
! IO will be initialized and any async IO tasks will be split off to the PIO async IO driver.
call NUOPC_CompSpecialize(ensemble_driver, specLabel=ensemble_label_PostChildrenAdvertise, &
specRoutine=InitializeIO, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! Create, open and set the config
config = ESMF_ConfigCreate(rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
Expand All @@ -63,6 +80,26 @@ subroutine SetServices(ensemble_driver, rc)
call ESMF_GridCompSet(ensemble_driver, config=config, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! NUOPC component drivers end the initialization process with an internal call to InitializeDataResolution.
! The ensemble_driver does not need to InitializeDataResolution and doing so will cause a hang
! if asyncronous IO is used. This attribute is available after ESMF8.4.0b03 to toggle that control.
! Cannot use asyncIO with older ESMF versions.
call NUOPC_CompAttributeGet(ensemble_driver, name="InitializeDataResolution", &
isPresent=isPresent, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

if(isPresent) then
call ESMF_LogWrite(trim(subname)//": setting InitializeDataResolution false", ESMF_LOGMSG_INFO)
call NUOPC_CompAttributeSet(ensemble_driver, name="InitializeDataResolution", value="false", rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
asyncIO_available = .true.
call ESMF_LogWrite(trim(subname)//": asyncio is available", ESMF_LOGMSG_INFO)
endif
! Set a finalize method, it calls pio_finalize
call NUOPC_CompSpecialize(ensemble_driver, specLabel=label_Finalize, &
specRoutine=ensemble_finalize, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)

end subroutine SetServices
Expand Down Expand Up @@ -105,6 +142,13 @@ subroutine SetModelServices(ensemble_driver, rc)
integer :: inst
integer :: number_of_members
integer :: ntasks_per_member
integer :: currentpet
integer :: iopetcnt
integer :: petcnt
logical :: comp_task
integer :: pio_asyncio_ntasks
integer :: pio_asyncio_stride
integer :: pio_asyncio_rootpe
character(CL) :: start_type ! Type of startup
character(len=7) :: drvrinst
character(len=5) :: inst_suffix
Expand Down Expand Up @@ -187,13 +231,25 @@ subroutine SetModelServices(ensemble_driver, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) number_of_members

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_ntasks", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_ntasks

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_stride", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_stride

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_rootpe", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_rootpe

call ESMF_VMGet(vm, localPet=localPet, PetCount=PetCount, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

ntasks_per_member = PetCount/number_of_members
if(ntasks_per_member*number_of_members .ne. PetCount) then
ntasks_per_member = PetCount/number_of_members - pio_asyncio_ntasks
if(ntasks_per_member*number_of_members .ne. (PetCount - pio_asyncio_ntasks)) then
write (msgstr,'(a,i5,a,i3,a,i3,a)') &
"PetCount (",PetCount,") must be evenly divisable by number of members (",number_of_members,")"
"PetCount - Async IOtasks (",PetCount-pio_asyncio_ntasks,") must be evenly divisable by number of members (",number_of_members,")"
call ESMF_LogSetError(ESMF_RC_ARG_BAD, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
return
endif
Expand All @@ -203,23 +259,32 @@ subroutine SetModelServices(ensemble_driver, rc)
!-------------------------------------------

allocate(petList(ntasks_per_member))

allocate(asyncio_petlist(pio_asyncio_ntasks))
currentpet = 0
iopetcnt = 1
do inst=1,number_of_members

petcnt=1
comp_task = .false.
! Determine pet list for driver instance
petList(1) = (inst-1) * ntasks_per_member
do n=2,ntasks_per_member
petList(n) = petList(n-1) + 1
enddo

do n=1,ntasks_per_member+pio_asyncio_ntasks
if(pio_asyncio_stride == 0 .or. modulo(n,pio_asyncio_rootpe+1) .ne. 0) then
petList(petcnt) = currentpet
petcnt = petcnt+1
if (currentpet == localPet) comp_task=.true.
else
asyncio_petlist(iopetcnt) = currentpet
iopetcnt = iopetcnt + 1
if (currentpet == localPet) asyncio_task=.true.
endif

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does one of these blocks (I'm thinking the first one) apply when not using asyncio? If so, I think this would be more clear and robust if there was an explicit check on whether asyncio is being used, or at least a comment here explaining this.

Part of what I'm wondering about here is whether there is an assumption here - and maybe elsewhere - that, if you're not using asyncio, then pio_asyncio_ntasks, pio_asyncio_stride and pio_asyncio_rootpe are at their default values. I'm imagining a scenario where someone first enables asyncio and tweaks those settings, but then wants to try rerunning with asyncio disabled. Do they need to explicitly reset those three variables to their defaults in that situation? If so, that seems error-prone. Can this (and maybe some other code) be written to examine pio_async_interface and ignore those three variables if that is false?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you may be right here. I'll create a few scenarios to test this and put in a fix if needed.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's difficult to get to the pio_async_interface flags at this point in the fortran and I am thinking about implementing something in the buildnml instead - would that be acceptable?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, that sounds like a good plan - thanks. Then can you just add a comment here saying which of these blocks (if either) applies to the case without asyncio?

currentpet = currentpet + 1
enddo
! Add driver instance to ensemble driver
write(drvrinst,'(a,i4.4)') "ESM",inst
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=gridcomptmp, rc=rc)
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=driver, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

if (localpet >= petlist(1) .and. localpet <= petlist(ntasks_per_member)) then

driver = gridcomptmp
mastertask = .false.
if (comp_task) then

if(number_of_members > 1) then
call NUOPC_CompAttributeAdd(driver, attrList=(/'inst_suffix'/), rc=rc)
Expand Down Expand Up @@ -256,15 +321,13 @@ subroutine SetModelServices(ensemble_driver, rc)
mastertask = .true.
else
logUnit = 6
mastertask = .false.
endif
call shr_log_setLogUnit (logunit)

! Create a clock for each driver instance
call esm_time_clockInit(ensemble_driver, driver, logunit, mastertask, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

endif
! Create a clock for each driver instance
call esm_time_clockInit(ensemble_driver, driver, logunit, mastertask, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

enddo

deallocate(petList)
Expand All @@ -273,4 +336,63 @@ subroutine SetModelServices(ensemble_driver, rc)

end subroutine SetModelServices

subroutine InitializeIO(ensemble_driver, rc)
use ESMF, only: ESMF_GridComp, ESMF_LOGMSG_INFO, ESMF_LogWrite
use ESMF, only: ESMF_SUCCESS, ESMF_VM, ESMF_GridCompGet, ESMF_VMGet
use ESMF, only: ESMF_CONFIG, ESMF_GridCompIsPetLocal, ESMF_State, ESMF_Clock
use NUOPC, only: NUOPC_CompAttributeGet, NUOPC_CompGet
use NUOPC_DRIVER, only: NUOPC_DriverGetComp
use driver_pio_mod , only: driver_pio_init, driver_pio_component_init

type(ESMF_GridComp) :: ensemble_driver
type(ESMF_VM) :: ensemble_vm
integer, intent(out) :: rc
character(len=*), parameter :: subname = '('//__FILE__//':InitializeIO)'
type(ESMF_GridComp), pointer :: dcomp(:), ccomp(:)
integer :: iam
integer :: Global_Comm
integer :: drv, comp
character(len=8) :: compname

rc = ESMF_SUCCESS
call ESMF_LogWrite(trim(subname)//": called", ESMF_LOGMSG_INFO)

call ESMF_GridCompGet(ensemble_driver, vm=ensemble_vm, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_VMGet(ensemble_vm, localpet=iam, mpiCommunicator=Global_Comm, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

nullify(dcomp)
call NUOPC_DriverGetComp(ensemble_driver, complist=dcomp, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

do drv=1,size(dcomp)
if (ESMF_GridCompIsPetLocal(dcomp(drv), rc=rc) .or. asyncio_task) then
if (chkerr(rc,__LINE__,u_FILE_u)) return
call NUOPC_CompGet(dcomp(drv), name=compname, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call ESMF_LogWrite(trim(subname)//": call shr_pio_init "//compname, ESMF_LOGMSG_INFO)
call driver_pio_init(dcomp(drv), rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_LogWrite(trim(subname)//": call shr_pio_component_init "//compname, ESMF_LOGMSG_INFO)
call driver_pio_component_init(dcomp(drv), Global_Comm, asyncio_petlist, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call ESMF_LogWrite(trim(subname)//": shr_pio_component_init done "//compname, ESMF_LOGMSG_INFO)
endif
enddo
deallocate(asyncio_petlist)
call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)
end subroutine InitializeIO

subroutine ensemble_finalize(ensemble_driver, rc)
use ESMF, only : ESMF_GridComp, ESMF_SUCCESS
use driver_pio_mod, only: driver_pio_finalize
type(ESMF_GridComp) :: Ensemble_driver
integer, intent(out) :: rc
rc = ESMF_SUCCESS
call driver_pio_finalize()

end subroutine ensemble_finalize
end module Ensemble_driver
9 changes: 4 additions & 5 deletions cesm/driver/esm.F90
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ subroutine esm_init_pelayout(driver, maxthreads, rc)
use mpi , only : MPI_COMM_NULL, mpi_comm_size
#endif
use mct_mod , only : mct_world_init
use driver_pio_mod , only : driver_pio_init, driver_pio_component_init

#ifdef MED_PRESENT
use med_internalstate_mod , only : med_id
Expand Down Expand Up @@ -932,8 +931,8 @@ subroutine esm_init_pelayout(driver, maxthreads, rc)

! Initialize PIO
! This reads in the pio parameters that are independent of component
call driver_pio_init(driver, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
! call driver_pio_init(driver, rc=rc)
! if (chkerr(rc,__LINE__,u_FILE_u)) return

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to go ahead and remove these commented-out lines, along with the two comment lines above?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


allocate(comms(componentCount+1), comps(componentCount+1))
comps(1) = 1
Expand Down Expand Up @@ -1180,8 +1179,8 @@ subroutine esm_init_pelayout(driver, maxthreads, rc)
enddo
! Read in component dependent PIO parameters and initialize
! IO systems
call driver_pio_component_init(driver, size(comps), rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
! call driver_pio_component_init(driver, size(comps), rc)
! if (chkerr(rc,__LINE__,u_FILE_u)) return

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to go ahead and remove these commented-out lines, along with the two comment lines above?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


! Initialize MCT (this is needed for data models and cice prescribed capability)
call mct_world_init(componentCount+1, GLOBAL_COMM, comms, comps)
Expand Down
Loading