diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..3ad04d6a
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,60 @@
+if(32BIT)
+remove_definitions(-DOVERLOAD_R4)
+remove_definitions(-DOVERLOAD_R8)
+message ("Force 64 bits in stochastic_physics")
+if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
+    if(REPRO)
+        string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+    else()
+        string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+    endif()
+elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8")
+endif()
+endif()
+
+add_library(
+    stochastic_physics
+
+    ./kinddef.F90
+    ./mpi_wrapper.F90
+    ./halo_exchange.fv3.F90
+    ./plumes.f90
+
+    ./stochy_gg_def.f
+    ./stochy_resol_def.f
+    ./stochy_layout_lag.f
+    ./four_to_grid_stochy.F
+    ./glats_stochy.f
+    ./sumfln_stochy.f
+    ./gozrineo_stochy.f
+    ./num_parthds_stochy.f
+    ./get_ls_node_stochy.f
+    ./get_lats_node_a_stochy.f
+    ./setlats_a_stochy.f
+    ./setlats_lag_stochy.f
+    ./epslon_stochy.f
+    ./getcon_lag_stochy.f
+    ./pln2eo_stochy.f
+    ./dozeuv_stochy.f
+    ./dezouv_stochy.f
+    ./mersenne_twister.F
+
+    ./spectral_layout.F90
+    ./getcon_spectral.F90
+    ./stochy_namelist_def.F90
+    ./compns_stochy.F90
+    ./stochy_internal_state_mod.F90
+    ./stochastic_physics.F90
+    ./stochy_patterngenerator.F90
+    ./stochy_data_mod.F90
+    ./get_stochy_pattern.F90
+    ./initialize_spectral_mod.F90
+    ./cellular_automata_global.F90
+    ./cellular_automata_sgs.F90
+    ./update_ca.F90
+)
+
+target_link_libraries(stochastic_physics sp::sp_d)
+target_link_libraries(stochastic_physics fms)
+
diff --git a/cellular_automata_global.F90 b/cellular_automata_global.F90
index be2dcb96..46724495 100644
--- a/cellular_automata_global.F90
+++ b/cellular_automata_global.F90
@@ -1,23 +1,22 @@
-subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
+module cellular_automata_global_mod
+
+implicit none
+
+contains
+
+subroutine cellular_automata_global(kstep,ca1_cpl,ca2_cpl,ca3_cpl,ca1_diag,ca2_diag,ca3_diag, &
+            domain_for_coupler,nblks,isc,iec,jsc,jec,npx,npy,nlev, &
             nca,ncells,nlives,nfracseed,nseed,nthresh,ca_global,ca_sgs,iseed_ca, &
-            ca_smooth,nspinup,blocksize,nsmooth,ca_amplitude)
+            ca_smooth,nspinup,blocksize,nsmooth,ca_amplitude,mpiroot,mpicomm)
 
-use machine
+use kinddef,           only: kind_phys
 use update_ca,         only: update_cells_sgs, update_cells_global
-#ifdef STOCHY_UNIT_TEST
-use standalone_stochy_module,      only: GFS_Coupling_type, GFS_diag_type, GFS_statein_type
-use atmosphere_stub_mod,    only: atmosphere_resolution, atmosphere_domain, &
-                                   atmosphere_scalar_field_halo, atmosphere_control_data
-#else
-use GFS_typedefs,      only: GFS_Coupling_type, GFS_diag_type, GFS_statein_type
-use atmosphere_mod,    only: atmosphere_resolution, atmosphere_domain, &
-                             atmosphere_scalar_field_halo, atmosphere_control_data
-#endif
+use halo_exchange,     only: atmosphere_scalar_field_halo
 use mersenne_twister,  only: random_setseed,random_gauss,random_stat,random_number
 use mpp_domains_mod,   only: domain2D
 use block_control_mod, only: block_control_type, define_blocks_packed
-use fv_mp_mod,         only : mp_reduce_sum,mp_bcst,mp_reduce_max,mp_reduce_min,is_master
-use mpp_mod, only : mpp_pe
+use mpi_wrapper,       only: mype,mp_reduce_sum,mp_bcst,mp_reduce_max,mp_reduce_min, &
+                             mpi_wrapper_initialize
 
 
 implicit none
@@ -27,19 +26,20 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 !This program evolves a cellular automaton uniform over the globe given
 !the flag ca_global
 
-integer,intent(in) :: kstep,ncells,nca,nlives,nseed,nspinup,nsmooth
-integer,intent(in) :: iseed_ca
-real,intent(in) :: nfracseed,nthresh,ca_amplitude
-logical,intent(in) :: ca_global, ca_sgs, ca_smooth
-integer, intent(in) :: nblks,nlev,blocksize
-type(GFS_coupling_type),intent(inout) :: Coupling(nblks)
-type(GFS_diag_type),intent(inout) :: Diag(nblks)
-type(GFS_statein_type),intent(in) :: Statein(nblks)
-type(block_control_type)          :: Atm_block
+integer,              intent(in)    :: kstep,ncells,nca,nlives,nseed,nspinup,nsmooth,mpiroot,mpicomm
+integer,              intent(in)    :: iseed_ca
+real(kind=kind_phys), intent(in)    :: nfracseed,nthresh,ca_amplitude
+logical,              intent(in)    :: ca_global, ca_sgs, ca_smooth
+integer,              intent(in)    :: nblks,isc,iec,jsc,jec,npx,npy,nlev,blocksize
+real(kind=kind_phys), intent(out)   :: ca1_cpl(:,:),ca2_cpl(:,:),ca3_cpl(:,:)
+real(kind=kind_phys), intent(out)   :: ca1_diag(:,:),ca2_diag(:,:),ca3_diag(:,:)
+type(domain2D),       intent(inout) :: domain_for_coupler
+    
+type(block_control_type) :: Atm_block
 type(random_stat) :: rstate
 integer :: nlon, nlat, isize,jsize,nf,nn
 integer :: inci, incj, nxc, nyc, nxch, nych
-integer :: halo, k_in, i, j, k, iec, jec, isc, jsc
+integer :: halo, k_in, i, j, k
 integer :: seed, ierr7,blk, ix, iix, count4,ih,jh
 integer :: blocksz,levs
 integer(8) :: count, count_rate, count_max, count_trunc
@@ -55,15 +55,19 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 !nca         :: switch for number of cellular automata to be used.
 !ca_global   :: switch for global cellular automata
-!ca_sgs      :: switch for cellular automata conditioned on SGS perturbed vertvel. 
+!ca_sgs      :: switch for cellular automata conditioned on SGS perturbed vertvel.
 !nfracseed   :: switch for number of random cells initially seeded
 !nlives      :: switch for maximum number of lives a cell can have
 !nspinup     :: switch for number of itterations to spin up the ca
-!ncells      :: switch for higher resolution grid e.g ncells=4 
-!               gives 4x4 times the FV3 model grid resolution.                
+!ncells      :: switch for higher resolution grid e.g ncells=4
+!               gives 4x4 times the FV3 model grid resolution.
 !ca_smooth   :: switch to smooth the cellular automata
 !nthresh     :: threshold of perturbed vertical velocity used in case of sgs
 
+! Initialize MPI and OpenMP
+if (kstep==0) then
+   call mpi_wrapper_initialize(mpiroot,mpicomm)
+end if
 
 halo=1
 k_in=1
@@ -90,19 +94,17 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 ! stop
 ! endif
 
-
- call atmosphere_resolution (nlon, nlat, global=.false.)
+ nlon=iec-isc+1
+ nlat=jec-jsc+1
  isize=nlon+2*halo
  jsize=nlat+2*halo
- !nlon,nlat is the compute domain - without haloes       
- !mlon,mlat is the cubed-sphere tile size. 
 
  inci=ncells
  incj=ncells
- 
+
  nxc=nlon*ncells
  nyc=nlat*ncells
- 
+
  nxch=nxc+2*halo
  nych=nyc+2*halo
 
@@ -121,32 +123,27 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
  allocate(CA3(nlon,nlat))
  allocate(noise(nxc,nyc,nca))
  allocate(noise1D(nxc*nyc))
-  
+
  !Initialize:
- 
- noise(:,:,:) = 0.0 
+
+ noise(:,:,:) = 0.0
  noise1D(:) = 0.0
  iini_g(:,:,:) = 0
  ilives_g(:,:) = 0
  CA1(:,:) = 0.0
- CA2(:,:) = 0.0 
+ CA2(:,:) = 0.0
  CA3(:,:) = 0.0
- 
-!Put the blocks of model fields into a 2d array
+
+ !Put the blocks of model fields into a 2d array - can't use nlev and blocksize directly,
+ !because the arguments to define_blocks_packed are intent(inout) and not intent(in).
  levs=nlev
  blocksz=blocksize
 
- call atmosphere_control_data(isc, iec, jsc, jec, levs)
  call define_blocks_packed('cellular_automata', Atm_block, isc, iec, jsc, jec, levs, &
                               blocksz, block_message)
 
-  isc = Atm_block%isc
-  iec = Atm_block%iec
-  jsc = Atm_block%jsc
-  jec = Atm_block%jec 
-
 !Generate random number, following stochastic physics code:
-if(kstep==0) then
+!if(kstep==0) then
   if (iseed_ca == 0) then
     ! generate a random seed from system clock and ens member number
     call system_clock(count, count_rate, count_max)
@@ -157,9 +154,9 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
   else if (iseed_ca > 0) then
     ! don't rely on compiler to truncate integer(8) to integer(4) on
     ! overflow, do wrap around explicitly.
-    count4 = mod(mpp_pe() + iseed_ca + 2147483648, 4294967296) - 2147483648 
+    count4 = mod(mype + iseed_ca + 2147483648, 4294967296) - 2147483648
   endif
-endif !kstep == 0
+!endif !kstep == 0
 
   call random_setseed(count4,rstate)
   do nf=1,nca
@@ -187,7 +184,7 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
     enddo
   enddo !nf
 endif ! kstep==0
- 
+
 !In case we want to condition the cellular automaton on a large scale field
 !we here set the "condition" variable to a different model field depending
 !on nf. (this is not used if ca_global = .true.)
@@ -201,21 +198,22 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
     enddo
    enddo
 
- 
 
-!Calculate neighbours and update the automata                                                                                                                                                            
-!If ca-global is used, then nca independent CAs are called and weighted together to create one field; CA                                                                                                                             
+
+!Calculate neighbours and update the automata
+!If ca-global is used, then nca independent CAs are called and weighted together to create one field; CA
 
 
-  CA(:,:)=0. 
+  CA(:,:)=0.
+
+  call update_cells_global(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,isc,iec,jsc,jec, &
+                           npx,npy,domain_for_coupler,CA,iini_g,ilives_g,         &
+                           nlives,ncells,nfracseed,nseed,nthresh,nspinup,nf)
 
-  call update_cells_global(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,iini_g,ilives_g, &
-                   nlives, ncells, nfracseed, nseed,nthresh, nspinup,nf) 
 
-   
 if (ca_smooth) then
 
-do nn=1,nsmooth !number of itterations for the smoothing. 
+do nn=1,nsmooth !number of iterations for the smoothing.
 
 field_in=0.
 
@@ -228,13 +226,13 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 field_out=0.
 
-call atmosphere_scalar_field_halo(field_out,halo,isize,jsize,k_in,field_in)
+call atmosphere_scalar_field_halo(field_out,halo,isize,jsize,k_in,field_in,isc,iec,jsc,jec,npx,npy,domain_for_coupler)
 
 do j=1,nlat
  do i=1,nlon
     ih=i+halo
     jh=j+halo
-    field_smooth(i,j)=(2.0*field_out(ih,jh,1)+2.0*field_out(ih-1,jh,1)+ & 
+    field_smooth(i,j)=(2.0*field_out(ih,jh,1)+2.0*field_out(ih-1,jh,1)+ &
                        2.0*field_out(ih,jh-1,1)+2.0*field_out(ih+1,jh,1)+&
                        2.0*field_out(ih,jh+1,1)+2.0*field_out(ih-1,jh-1,1)+&
                        2.0*field_out(ih-1,jh+1,1)+2.0*field_out(ih+1,jh+1,1)+&
@@ -281,23 +279,23 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 CAmean=psum/csum
 
 !std:
-CAstdv=0.                                                                                                                                                                                             
-sq_diff = 0.                                                                                                                                                                                          
-do j=1,nlat                                                                                                                                                                                           
- do i=1,nlon                                                                                                                                                                                          
-  sq_diff = sq_diff + (CA(i,j)-CAmean)**2.0                                                                                                                                                        
- enddo                                                                                                                                                                                                
-enddo                                                                                                                                                                                                 
+CAstdv=0.
+sq_diff = 0.
+do j=1,nlat
+ do i=1,nlon
+  sq_diff = sq_diff + (CA(i,j)-CAmean)**2.0
+ enddo
+enddo
 
-call mp_reduce_sum(sq_diff)                                                                                                                                                                           
+call mp_reduce_sum(sq_diff)
 
-CAstdv = sqrt(sq_diff/csum)                                                                                                                                                                 
+CAstdv = sqrt(sq_diff/csum)
 
 !Transform to mean of 1 and ca_amplitude standard deviation
 
 do j=1,nlat
  do i=1,nlon
-  CA(i,j)=1.0 + (CA(i,j)-CAmean)*(ca_amplitude/CAstdv)  
+  CA(i,j)=1.0 + (CA(i,j)-CAmean)*(ca_amplitude/CAstdv)
  enddo
 enddo
 
@@ -313,26 +311,26 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
 CA(:,:)=1.
 endif
 
-    if(nf==1)then                                                                                                                                                                                                                          
-    CA1(:,:)=CA(:,:)                                                                                                                                                                                                                       
-    elseif(nf==2)then                                                                                                                                                                                                                      
-    CA2(:,:)=CA(:,:)                                                                                                                                                                                                                       
-    else                                                                                                                                                                                                                                   
-    CA3(:,:)=CA(:,:)                                                                                                                                                                                                                       
-    endif  
+    if(nf==1)then
+    CA1(:,:)=CA(:,:)
+    elseif(nf==2)then
+    CA2(:,:)=CA(:,:)
+    else
+    CA3(:,:)=CA(:,:)
+    endif
 
 enddo !nf
-  
+
   do blk = 1, Atm_block%nblks
   do ix = 1,Atm_block%blksz(blk)
      i = Atm_block%index(blk)%ii(ix) - isc + 1
      j = Atm_block%index(blk)%jj(ix) - jsc + 1
-     Diag(blk)%ca1(ix)=CA1(i,j)
-     Diag(blk)%ca2(ix)=CA2(i,j)
-     Diag(blk)%ca3(ix)=CA3(i,j)
-     Coupling(blk)%ca1(ix)=CA1(i,j)
-     Coupling(blk)%ca2(ix)=CA2(i,j)
-     Coupling(blk)%ca3(ix)=CA3(i,j)
+     ca1_diag(blk,ix)=CA1(i,j)
+     ca2_diag(blk,ix)=CA2(i,j)
+     ca3_diag(blk,ix)=CA3(i,j)
+     ca1_cpl(blk,ix)=CA1(i,j)
+     ca2_cpl(blk,ix)=CA2(i,j)
+     ca3_cpl(blk,ix)=CA3(i,j)
   enddo
   enddo
 
@@ -350,3 +348,5 @@ subroutine cellular_automata_global(kstep,Statein,Coupling,Diag,nblks,nlev, &
  deallocate(noise1D)
 
 end subroutine cellular_automata_global
+
+end module cellular_automata_global_mod
diff --git a/cellular_automata_sgs.F90 b/cellular_automata_sgs.F90
index 035ca9ad..36af1c81 100644
--- a/cellular_automata_sgs.F90
+++ b/cellular_automata_sgs.F90
@@ -1,23 +1,22 @@
-subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
+module cellular_automata_sgs_mod
+
+implicit none
+
+contains
+
+subroutine cellular_automata_sgs(kstep,ugrs,qgrs,pgr,vvl,prsl,condition_cpl, &
+            ca_deep_cpl,ca_turb_cpl,ca_shal_cpl,ca_deep_diag,ca_turb_diag,ca_shal_diag,domain_for_coupler, &
+            nblks,isc,iec,jsc,jec,npx,npy,nlev, &
             nca,ncells,nlives,nfracseed,nseed,nthresh,ca_global,ca_sgs,iseed_ca, &
-            ca_smooth,nspinup,blocksize)
+            ca_smooth,nspinup,blocksize,mpiroot, mpicomm)
 
-use machine
+use kinddef,           only: kind_phys
 use update_ca,         only: update_cells_sgs, update_cells_global
-#ifdef STOCHY_UNIT_TEST
-use standalone_stochy_module,      only: GFS_Coupling_type, GFS_diag_type, GFS_statein_type
-use atmosphere_stub_mod,    only: atmosphere_resolution, atmosphere_domain, &
-                                   atmosphere_scalar_field_halo, atmosphere_control_data
-#else
-use GFS_typedefs,      only: GFS_Coupling_type, GFS_diag_type, GFS_statein_type
-use atmosphere_mod,    only: atmosphere_resolution, atmosphere_domain, &
-                             atmosphere_scalar_field_halo, atmosphere_control_data
-#endif
 use mersenne_twister,  only: random_setseed,random_gauss,random_stat,random_number
 use mpp_domains_mod,   only: domain2D
 use block_control_mod, only: block_control_type, define_blocks_packed
-use fv_mp_mod,         only : mp_reduce_sum,mp_bcst,mp_reduce_max,mp_reduce_min,is_master
-use mpp_mod, only : mpp_pe
+use mpi_wrapper,       only: mype,mp_reduce_sum,mp_bcst,mp_reduce_max,mp_reduce_min, &
+                             mpi_wrapper_initialize
 
 
 implicit none
@@ -25,31 +24,42 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 !L.Bengtsson, 2017-06
 
 !This program evolves a cellular automaton uniform over the globe given
-!the flag ca_global, if instead ca_sgs is .true. it evolves a cellular automata conditioned on 
-!perturbed grid-box mean field. The perturbations to the mean field are given by a 
+!the flag ca_global, if instead ca_sgs is .true. it evolves a cellular automata conditioned on
+!perturbed grid-box mean field. The perturbations to the mean field are given by a
 !stochastic gaussian skewed (SGS) distribution.
 
 !If ca_global is .true. it weighs the number of ca (nca) together to produce 1 output pattern
-!If instead ca_sgs is given, it produces nca ca:                                                                                                                                                                            
-! 1 CA_DEEP = deep convection                                                                                                                                                                              
-! 2 CA_SHAL = shallow convection                                                                                                                                                                             
-! 3 CA_TURB = turbulence                                                                                                                                                                                        
+!If instead ca_sgs is given, it produces nca ca:
+! 1 CA_DEEP = deep convection
+! 2 CA_SHAL = shallow convection
+! 3 CA_TURB = turbulence
 
-!PLEASE NOTE: This is considered to be version 0 of the cellular automata code for FV3GFS, some functionally 
-!is missing/limited. 
+!PLEASE NOTE: This is considered to be version 0 of the cellular automata code for FV3GFS, some functionally
+!is missing/limited.
 
-integer,intent(in) :: kstep,ncells,nca,nlives,nseed,iseed_ca,nspinup
-real,intent(in) :: nfracseed,nthresh
+integer,intent(in) :: kstep,ncells,nca,nlives,nseed,iseed_ca,nspinup,mpiroot,mpicomm
+real(kind=kind_phys), intent(in)    :: nfracseed,nthresh
 logical,intent(in) :: ca_global, ca_sgs, ca_smooth
-integer, intent(in) :: nblks,nlev,blocksize
-type(GFS_coupling_type),intent(inout) :: Coupling(nblks)
-type(GFS_diag_type),intent(inout) :: Diag(nblks)
-type(GFS_statein_type),intent(in) :: Statein(nblks)
+integer, intent(in) :: nblks,isc,iec,jsc,jec,npx,npy,nlev,blocksize
+real(kind=kind_phys), intent(in)    :: ugrs(:,:,:)
+real(kind=kind_phys), intent(in)    :: qgrs(:,:,:)
+real(kind=kind_phys), intent(in)    :: pgr(:,:)
+real(kind=kind_phys), intent(in)    :: vvl(:,:,:)
+real(kind=kind_phys), intent(in)    :: prsl(:,:,:)
+real(kind=kind_phys), intent(inout) :: condition_cpl(:,:)
+real(kind=kind_phys), intent(inout) :: ca_deep_cpl(:,:)
+real(kind=kind_phys), intent(inout) :: ca_turb_cpl(:,:)
+real(kind=kind_phys), intent(inout) :: ca_shal_cpl(:,:)
+real(kind=kind_phys), intent(out)   :: ca_deep_diag(:,:)
+real(kind=kind_phys), intent(out)   :: ca_turb_diag(:,:)
+real(kind=kind_phys), intent(out)   :: ca_shal_diag(:,:)
+type(domain2D), intent(inout)       :: domain_for_coupler
+
 type(block_control_type)          :: Atm_block
 type(random_stat) :: rstate
 integer :: nlon, nlat, isize,jsize,nf,nn
 integer :: inci, incj, nxc, nyc, nxch, nych
-integer :: halo, k_in, i, j, k, iec, jec, isc, jsc
+integer :: halo, k_in, i, j, k
 integer :: seed, ierr7,blk, ix, iix, count4,ih,jh
 integer :: blocksz,levs,k350,k850
 integer(8) :: count, count_rate, count_max, count_trunc
@@ -68,16 +78,21 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 !nca         :: switch for number of cellular automata to be used.
 !ca_global   :: switch for global cellular automata
-!ca_sgs      :: switch for cellular automata conditioned on SGS perturbed vertvel. 
+!ca_sgs      :: switch for cellular automata conditioned on SGS perturbed vertvel.
 !nfracseed   :: switch for number of random cells initially seeded
 !nlives      :: switch for maximum number of lives a cell can have
 !nspinup     :: switch for number of itterations to spin up the ca
-!ncells      :: switch for higher resolution grid e.g ncells=4 
-!               gives 4x4 times the FV3 model grid resolution.                
+!ncells      :: switch for higher resolution grid e.g ncells=4
+!               gives 4x4 times the FV3 model grid resolution.
 !ca_smooth   :: switch to smooth the cellular automata
 !nthresh     :: threshold of perturbed vertical velocity used in case of sgs
 !nca_plumes   :: compute number of CA-cells ("plumes") within a NWP gridbox.
 
+! Initialize MPI and OpenMP
+if (kstep==0) then
+   call mpi_wrapper_initialize(mpiroot,mpicomm)
+end if
+
 halo=1
 k_in=1
 
@@ -104,28 +119,28 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  stop
  endif
 
- call atmosphere_resolution (nlon, nlat, global=.false.)
+ nlon=iec-isc+1
+ nlat=jec-jsc+1
  isize=nlon+2*halo
  jsize=nlat+2*halo
 
  inci=ncells
  incj=ncells
- 
+
  nxc=nlon*ncells
  nyc=nlat*ncells
- 
+
  nxch=nxc+2*halo
  nych=nyc+2*halo
 
  !Allocate fields:
- levs=nlev
 
  allocate(cloud(nlon,nlat))
- allocate(omega(nlon,nlat,levs))
- allocate(pressure(nlon,nlat,levs))
+ allocate(omega(nlon,nlat,nlev))
+ allocate(pressure(nlon,nlat,nlev))
  allocate(humidity(nlon,nlat))
  allocate(uwind(nlon,nlat))
- allocate(dp(nlon,nlat,levs))
+ allocate(dp(nlon,nlat,nlev))
  allocate(rho(nlon,nlat))
  allocate(surfp(nlon,nlat))
  allocate(vertvelmean(nlon,nlat))
@@ -146,16 +161,16 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  allocate(CA(nlon,nlat))
  allocate(ca_plumes(nlon,nlat))
  allocate(CA_TURB(nlon,nlat))
- allocate(CA_DEEP(nlon,nlat)) 
+ allocate(CA_DEEP(nlon,nlat))
  allocate(CA_SHAL(nlon,nlat))
  allocate(noise(nxc,nyc,nca))
  allocate(noise1D(nxc*nyc))
-  
+
  !Initialize:
  Detfield(:,:,:)=0.
  vertvelmean(:,:) =0.
  vertvelsum(:,:)=0.
- cloud(:,:)=0. 
+ cloud(:,:)=0.
  humidity(:,:)=0.
  uwind(:,:) = 0.
  condition(:,:)=0.
@@ -171,32 +186,27 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  Detmax(:)=0.
  Detmin(:)=0.
 
-!Put the blocks of model fields into a 2d array
+ !Put the blocks of model fields into a 2d array - can't use nlev and blocksize directly,
+ !because the arguments to define_blocks_packed are intent(inout) and not intent(in).
  levs=nlev
  blocksz=blocksize
 
- call atmosphere_control_data(isc, iec, jsc, jec, levs)
  call define_blocks_packed('cellular_automata', Atm_block, isc, iec, jsc, jec, levs, &
                               blocksz, block_message)
 
-  isc = Atm_block%isc
-  iec = Atm_block%iec
-  jsc = Atm_block%jsc
-  jec = Atm_block%jec 
-
  do blk = 1,Atm_block%nblks
   do ix = 1, Atm_block%blksz(blk)
       i = Atm_block%index(blk)%ii(ix) - isc + 1
       j = Atm_block%index(blk)%jj(ix) - jsc + 1
-      uwind(i,j) = Statein(blk)%ugrs(ix,k350)
-      conditiongrid(i,j) = Coupling(blk)%condition(ix) 
-      shalp(i,j) = Coupling(blk)%ca_shal(ix)
-      gamt(i,j) = Coupling(blk)%ca_turb(ix)
-      surfp(i,j) = Statein(blk)%pgr(ix)
-      humidity(i,j)=Statein(blk)%qgrs(ix,k850,1) !about 850 hpa
+      uwind(i,j)         = ugrs(blk,ix,k350)
+      conditiongrid(i,j) = condition_cpl(blk,ix)
+      shalp(i,j)         = ca_shal_cpl(blk,ix)
+      gamt(i,j)          = ca_turb_cpl(blk,ix)
+      surfp(i,j)         = pgr(blk,ix)
+      humidity(i,j)      = qgrs(blk,ix,k850) !about 850 hpa
       do k = 1,k350 !Lower troposphere
-      omega(i,j,k) = Statein(blk)%vvl(ix,k) ! layer mean vertical velocity in pa/sec
-      pressure(i,j,k) = Statein(blk)%prsl(ix,k) ! layer mean pressure in Pa
+      omega(i,j,k)       = vvl(blk,ix,k) ! layer mean vertical velocity in pa/sec
+      pressure(i,j,k)    = prsl(blk,ix,k) ! layer mean pressure in Pa
       enddo
   enddo
  enddo
@@ -229,19 +239,18 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  enddo
 
 !Generate random number, following stochastic physics code:
-
-if(kstep==2) then
+!if(kstep==2) then
   if (iseed_ca == 0) then
     ! generate a random seed from system clock and ens member number
     call system_clock(count, count_rate, count_max)
     ! iseed is elapsed time since unix epoch began (secs)
     ! truncate to 4 byte integer
     count_trunc = iscale*(count/iscale)
-    count4 = count - count_trunc 
+    count4 = count - count_trunc
   else
     ! don't rely on compiler to truncate integer(8) to integer(4) on
     ! overflow, do wrap around explicitly.
-    count4 = mod(mpp_pe() + iseed_ca + 2147483648, 4294967296) - 2147483648 
+    count4 = mod(mype + iseed_ca + 2147483648, 4294967296) - 2147483648
   endif
 
   call random_setseed(count4)
@@ -257,7 +266,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 
 !Initiate the cellular automaton with random numbers larger than nfracseed
- 
+
     do j = 1,nyc
       do i = 1,nxc
         if (noise(i,j,nf) > nfracseed ) then
@@ -267,24 +276,24 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
         endif
       enddo
     enddo
- 
+
   enddo !nf
-endif ! kstep=0
- 
+!endif ! kstep=0
+
 !In case we want to condition the cellular automaton on a large scale field
 !we here set the "condition" variable to a different model field depending
 !on nf. (this is not used if ca_global = .true.)
 
 
 do nf=1,nca !update each ca
- 
-  
-  if(nf==1)then 
+
+
+  if(nf==1)then
   inci=ncells
   incj=ncells
   do j=1,nyc
    do i=1,nxc
-     condition(i,j)=conditiongrid(inci/ncells,incj/ncells) 
+     condition(i,j)=conditiongrid(inci/ncells,incj/ncells)
      if(i.eq.inci)then
      inci=inci+ncells
      endif
@@ -299,7 +308,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
   call mp_reduce_max(condmax)
 
    do j = 1,nyc
-    do i = 1,nxc  
+    do i = 1,nxc
       ilives(i,j,nf)=real(nlives)*(condition(i,j)/condmax)
     enddo
    enddo
@@ -357,7 +366,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
  endif !nf
 
-  
+
 !Vertical velocity has its own variable in order to condition on combination
 !of "condition" and vertical velocity.
 
@@ -376,11 +385,12 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
    endif
   enddo
 
-!Calculate neighbours and update the automata                                                                                                                                                            
-!If ca-global is used, then nca independent CAs are called and weighted together to create one field; CA                                                                                                                                                                                                                                  
-  
-  call update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,iini,ilives, &
-                   nlives, ncells, nfracseed, nseed,nthresh,nspinup,nf,nca_plumes)
+!Calculate neighbours and update the automata
+!If ca-global is used, then nca independent CAs are called and weighted together to create one field; CA
+
+  call update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,isc,iec,jsc,jec, &
+                   npx,npy,domain_for_coupler,CA,ca_plumes,iini,ilives,        &
+                   nlives,ncells,nfracseed,nseed,nthresh,nspinup,nf,nca_plumes)
 
    if(nf==1)then
     CA_DEEP(:,:)=CA(:,:)
@@ -414,7 +424,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  enddo
 enddo
 
-!Compute the mean of the new range and subtract 
+!Compute the mean of the new range and subtract
 CAmean=0.
 psum=0.
 csum=0.
@@ -445,7 +455,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 !Shallow convection ============================================================
 
-!Use min-max method to normalize range                                                                                                                                                                                                            
+!Use min-max method to normalize range
 Detmax(2)=maxval(CA_SHAL,CA_SHAL.NE.0)
 call mp_reduce_max(Detmax(2))
 Detmin(2)=minval(CA_SHAL,CA_SHAL.NE.0)
@@ -459,7 +469,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  enddo
 enddo
 
-!Compute the mean of the new range and subtract                                                                                                                                                                                                   
+!Compute the mean of the new range and subtract
 CAmean=0.
 psum=0.
 csum=0.
@@ -487,7 +497,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 !Turbulence =============================================================================
 
-!Use min-max method to normalize range                                                                                                                                                                                                            
+!Use min-max method to normalize range
 Detmax(3)=maxval(CA_TURB,CA_TURB.NE.0)
 call mp_reduce_max(Detmax(3))
 Detmin(3)=minval(CA_TURB,CA_TURB.NE.0)
@@ -501,7 +511,7 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  enddo
 enddo
 
-!Compute the mean of the new range and subtract                                                                                                                                                                                                   
+!Compute the mean of the new range and subtract
 CAmean=0.
 psum=0.
 csum=0.
@@ -548,17 +558,17 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
 
 !Put back into blocks 1D array to be passed to physics
 !or diagnostics output
-  
+
   do blk = 1, Atm_block%nblks
   do ix = 1,Atm_block%blksz(blk)
      i = Atm_block%index(blk)%ii(ix) - isc + 1
      j = Atm_block%index(blk)%jj(ix) - jsc + 1
-     Diag(blk)%ca_deep(ix)=ca_plumes(i,j)
-     Diag(blk)%ca_turb(ix)=conditiongrid(i,j)
-     Diag(blk)%ca_shal(ix)=CA_SHAL(i,j)
-     Coupling(blk)%ca_deep(ix)=ca_plumes(i,j)
-     Coupling(blk)%ca_turb(ix)=CA_TURB(i,j)
-     Coupling(blk)%ca_shal(ix)=CA_SHAL(i,j)
+     ca_deep_diag(blk,ix)=ca_plumes(i,j)
+     ca_turb_diag(blk,ix)=conditiongrid(i,j)
+     ca_shal_diag(blk,ix)=CA_SHAL(i,j)
+     ca_deep_cpl(blk,ix)=ca_plumes(i,j)
+     ca_turb_cpl(blk,ix)=CA_TURB(i,j)
+     ca_shal_cpl(blk,ix)=CA_SHAL(i,j)
   enddo
   enddo
 
@@ -590,3 +600,5 @@ subroutine cellular_automata_sgs(kstep,Statein,Coupling,Diag,nblks,nlev, &
  deallocate(noise1D)
 
 end subroutine cellular_automata_sgs
+
+end module cellular_automata_sgs_mod
diff --git a/dezouv_stochy.f b/dezouv_stochy.f
index 9707a006..25e896c2 100644
--- a/dezouv_stochy.f
+++ b/dezouv_stochy.f
@@ -16,7 +16,7 @@ subroutine dezouv_stochy(dev,zod,uev,vod,epsedn,epsodn,
 cc
       use stochy_resol_def
       use spectral_layout_mod
-      use machine
+      use kinddef
       implicit none
 cc
       real(kind_dbl_prec)     dev(len_trie_ls,2)
diff --git a/dozeuv_stochy.f b/dozeuv_stochy.f
index 03e27ce9..e67821c9 100644
--- a/dozeuv_stochy.f
+++ b/dozeuv_stochy.f
@@ -14,7 +14,7 @@ subroutine dozeuv_stochy(dod,zev,uod,vev,epsedn,epsodn,
 cc
       use stochy_resol_def
       use spectral_layout_mod
-      use machine
+      use kinddef
       implicit none
 cc
       real(kind_dbl_prec)     dod(len_trio_ls,2)
diff --git a/epslon_stochy.f b/epslon_stochy.f
index 06acfea1..55ef0d17 100644
--- a/epslon_stochy.f
+++ b/epslon_stochy.f
@@ -12,7 +12,7 @@ subroutine epslon_stochy(epse,epso,epsedn,epsodn,
 cc
       use stochy_resol_def
       use spectral_layout_mod
-      use machine
+      use kinddef
       implicit none
 cc
       real(kind_dbl_prec)   epse(len_trie_ls)
diff --git a/four_to_grid_stochy.F b/four_to_grid_stochy.F
index 053e05a2..358be211 100644
--- a/four_to_grid_stochy.F
+++ b/four_to_grid_stochy.F
@@ -12,7 +12,7 @@ module four_to_grid_mod
       subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
      &                        lon_dim_coef,lon_dim_grid,lons_lat,lot)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      use machine
+      use kinddef
       implicit none
 !!
       real(kind=kind_dbl_prec)     syn_gr_a_1(lon_dim_coef,lot)
@@ -170,7 +170,7 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
       subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1,
      &                        lon_dim_grid,lon_dim_coef,lons_lat,lot)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      use machine
+      use kinddef
       implicit none
 !!
       real(kind=kind_dbl_prec)     anl_gr_a_2(lon_dim_grid,lot)
diff --git a/get_stochy_pattern.F90 b/get_stochy_pattern.F90
index 8b7ca03e..2e453f34 100644
--- a/get_stochy_pattern.F90
+++ b/get_stochy_pattern.F90
@@ -1,7 +1,7 @@
 !>@brief The module 'get_stochy_pattern_mod' contains the subroutines to retrieve the random pattern in the cubed-sphere grid
 module get_stochy_pattern_mod
 !! the stochastic physics random pattern generators
- use machine, only : kind_dbl_prec, kind_evod
+ use kinddef, only : kind_dbl_prec, kind_evod
  use stochy_resol_def, only : latg, latg2, levs, lonf, skeblevs
  use spectral_layout_mod, only : ipt_lats_node_a, lat1s_a, lats_dim_a,      &
                                  lats_node_a, lon_dim_a, len_trie_ls,       &
@@ -15,12 +15,7 @@ module get_stochy_pattern_mod
  use stochy_patterngenerator_mod, only: random_pattern, ndimspec,           &
                                         patterngenerator_advance
  use stochy_internal_state_mod, only: stochy_internal_state
- use fv_mp_mod, only : mp_reduce_sum,is_master
-#ifdef STOCHY_UNIT_TEST
-use standalone_stochy_module,   only: GFS_control_type, GFS_grid_type
-# else
-use GFS_typedefs,       only: GFS_control_type, GFS_grid_type
-#endif
+ use mpi_wrapper, only : mp_reduce_sum,is_master
  use mersenne_twister, only: random_seed
  use dezouv_stochy_mod, only: dezouv_stochy
  use dozeuv_stochy_mod, only: dozeuv_stochy
@@ -38,16 +33,15 @@ module get_stochy_pattern_mod
 !>@brief The subroutine 'get_random_pattern_fv3' converts spherical harmonics to the gaussian grid then interpolates to the cubed-sphere grid
 !>@details This subroutine is for a 2-D (lat-lon) scalar field
 subroutine get_random_pattern_fv3(rpattern,npatterns,&
-           gis_stochy,Model,Grid,nblks,maxlen,pattern_2d)
+           gis_stochy,xlat,xlon,blksz,nblks,maxlen,pattern_2d)
 !\callgraph
 
 ! generate a random pattern for stochastic physics
  implicit none
  type(random_pattern), intent(inout)  :: rpattern(npatterns)
  type(stochy_internal_state)          :: gis_stochy
- type(GFS_control_type),   intent(in) :: Model
- type(GFS_grid_type),      intent(in) :: Grid(nblks)
- integer,intent(in)::   npatterns,nblks,maxlen
+ real(kind=kind_dbl_prec), intent(in) :: xlat(:,:),xlon(:,:)
+ integer,intent(in)                   :: npatterns,blksz(:),nblks,maxlen
 
  integer i,j,l,lat,ierr,n,nn,k,nt
  real(kind=kind_dbl_prec), dimension(lonf,gis_stochy%lats_node_a,1):: wrk2d
@@ -91,10 +85,10 @@ subroutine get_random_pattern_fv3(rpattern,npatterns,&
 ! interpolate to cube grid
    allocate(rslmsk(lonf,latg))
    do blk=1,nblks
-      len=size(Grid(blk)%xlat,1)
+      len=blksz(blk)
       pattern_1d = 0
-      associate( tlats=>Grid(blk)%xlat*rad2deg,&
-                 tlons=>Grid(blk)%xlon*rad2deg )
+      associate( tlats=>xlat(blk,:)*rad2deg,&
+                 tlons=>xlon(blk,:)*rad2deg )
          call stochy_la2ga(workg,lonf,latg,gg_lons,gg_lats,wlon,rnlat,&
                            pattern_1d(1:len),len,rslmsk,tlats,tlons)
       pattern_2d(blk,:)=pattern_1d(:)
@@ -109,16 +103,15 @@ end subroutine get_random_pattern_fv3
 !>@brief The subroutine 'get_random_pattern_fv3_sfc' converts spherical harmonics to the gaussian grid then interpolates to the cubed-sphere grid once
 !>@details This subroutine is for a 2-D (lat-lon) scalar field
 subroutine get_random_pattern_sfc_fv3(rpattern,npatterns,&
-           gis_stochy,Model,Grid,nblks,maxlen,pattern_3d)
+           gis_stochy,xlat,xlon,blksz,nblks,maxlen,pattern_3d)
 !\callgraph
 
 ! generate a random pattern for stochastic physics
  implicit none
- type(random_pattern), intent(inout) :: rpattern(npatterns)
- type(stochy_internal_state), target :: gis_stochy
- type(GFS_control_type),   intent(in) :: Model
- type(GFS_grid_type),      intent(in) :: Grid(nblks)
- integer,intent(in)::   npatterns,nblks,maxlen
+ type(random_pattern), intent(inout)  :: rpattern(npatterns)
+ type(stochy_internal_state), target  :: gis_stochy
+ real(kind=kind_dbl_prec), intent(in) :: xlat(:,:),xlon(:,:)
+ integer,intent(in)                   :: npatterns,blksz(:),nblks,maxlen
 
  integer i,j,l,lat,ierr,n,nn,k,nt
  real(kind=kind_dbl_prec), dimension(lonf,gis_stochy%lats_node_a,1):: wrk2d
@@ -164,10 +157,10 @@ subroutine get_random_pattern_sfc_fv3(rpattern,npatterns,&
 
    allocate(rslmsk(lonf,latg))
    do blk=1,nblks
-      len=size(Grid(blk)%xlat,1)
+      len=blksz(blk)
       pattern_1d = 0
-      associate( tlats=>Grid(blk)%xlat*rad2deg,&
-                 tlons=>Grid(blk)%xlon*rad2deg )
+      associate( tlats=>xlat(blk,:)*rad2deg,&
+                 tlons=>xlon(blk,:)*rad2deg )
          call stochy_la2ga(workg,lonf,latg,gg_lons,gg_lats,wlon,rnlat,&
                            pattern_1d(1:len),len,rslmsk,tlats,tlons)
       pattern_3d(blk,:,k)=pattern_1d(:)
@@ -185,20 +178,21 @@ end subroutine get_random_pattern_sfc_fv3
 !>@brief The subroutine 'get_random_pattern_fv3_vect' converts spherical harmonics to a vector on gaussian grid then interpolates to the cubed-sphere grid 
 !>@details This subroutine is for a 2-D (lat-lon) vector field
 subroutine get_random_pattern_fv3_vect(rpattern,npatterns,&
-           gis_stochy,Model,Grid,nblks,maxlen,upattern_3d,vpattern_3d)
+           gis_stochy,levs,xlat,xlon,blksz,nblks,maxlen,upattern_3d,vpattern_3d)
 !\callgraph
 
 ! generate a random pattern for stochastic physics
  implicit none
- type(GFS_control_type),   intent(in) :: Model
- type(GFS_grid_type),      intent(in) :: Grid(nblks)
  type(stochy_internal_state), target :: gis_stochy
+ integer,              intent(in)    :: levs
  type(random_pattern), intent(inout) :: rpattern(npatterns)
 
  real(kind=kind_evod), dimension(len_trie_ls,2,1) ::  vrtspec_e,divspec_e
  real(kind=kind_evod), dimension(len_trio_ls,2,1) ::  vrtspec_o,divspec_o
- integer::   npatterns,nblks,blk,len,maxlen
+ real(kind=kind_dbl_prec), intent(in) :: xlat(:,:),xlon(:,:)
+ integer,intent(in)                   :: npatterns,blksz(:),nblks,maxlen
 
+ integer :: blk,len
  real(kind=kind_dbl_prec) :: upattern_3d(nblks,maxlen,levs)
  real(kind=kind_dbl_prec) :: vpattern_3d(nblks,maxlen,levs)
  real(kind=kind_dbl_prec) :: pattern_1d(maxlen)
@@ -250,10 +244,10 @@ subroutine get_random_pattern_fv3_vect(rpattern,npatterns,&
     call mp_reduce_sum(workgv,lonf,latg)
 ! interpolate to cube grid
     do blk=1,nblks
-       len=size(Grid(blk)%xlat,1)
+       len=blksz(blk)
        pattern_1d = 0
-          associate( tlats=>Grid(blk)%xlat*rad2deg,&
-                     tlons=>Grid(blk)%xlon*rad2deg )
+          associate( tlats=>xlat(blk,:)*rad2deg,&
+                     tlons=>xlon(blk,:)*rad2deg )
              call stochy_la2ga(workgu,lonf,latg,gg_lons,gg_lats,wlon,rnlat,&
                                pattern_1d(1:len),len,rslmsk,tlats,tlons)
        skebu_save(blk,:,k)=pattern_1d(:)
@@ -308,10 +302,10 @@ subroutine get_random_pattern_fv3_vect(rpattern,npatterns,&
  call mp_reduce_sum(workgv,lonf,latg)
 ! interpolate to cube grid
  do blk=1,nblks
-    len=size(Grid(blk)%xlat,1)
+    len=blksz(blk)
     pattern_1d = 0
-    associate( tlats=>Grid(blk)%xlat*rad2deg,&
-               tlons=>Grid(blk)%xlon*rad2deg )
+    associate( tlats=>xlat(blk,:)*rad2deg,&
+               tlons=>xlon(blk,:)*rad2deg )
        call stochy_la2ga(workgu,lonf,latg,gg_lons,gg_lats,wlon,rnlat,&
                          pattern_1d(1:len),len,rslmsk,tlats,tlons)
     skebu_save(blk,:,skeblevs)=pattern_1d(:)
@@ -324,7 +318,7 @@ subroutine get_random_pattern_fv3_vect(rpattern,npatterns,&
   deallocate(workgu)
   deallocate(workgv)
 ! interpolate in the vertical  ! consider moving to cubed sphere side,  more memory, but less interpolations
- do k=1,Model%levs
+ do k=1,levs
     do blk=1,nblks
        upattern_3d(blk,:,k) = skeb_vwts(k,1)*skebu_save(blk,:,skeb_vpts(k,1))+skeb_vwts(k,2)*skebu_save(blk,:,skeb_vpts(k,2))
        vpattern_3d(blk,:,k) = skeb_vwts(k,1)*skebv_save(blk,:,skeb_vpts(k,1))+skeb_vwts(k,2)*skebv_save(blk,:,skeb_vpts(k,2))
diff --git a/glats_stochy.f b/glats_stochy.f
index b4abbdde..51aad0fe 100644
--- a/glats_stochy.f
+++ b/glats_stochy.f
@@ -12,7 +12,7 @@ subroutine glats_stochy(lgghaf,colrad,wgt,wgtcs,rcs2,iprint)
 !
 ! Jan 2013   Henry Juang  increase precision by kind_qdt_prec=16
 !                         to help wgt (Gaussian weighting)
-      use machine
+      use kinddef
       implicit none
       integer                  iter,k,k1,l2,lgghaf,iprint
 !
@@ -87,7 +87,7 @@ subroutine glats_stochy(lgghaf,colrad,wgt,wgtcs,rcs2,iprint)
 !>@brief The subroutine 'poly' does something with latitudes
 !>@details This code is taken from the legacy spectral GFS
       subroutine poly(n,rad,p)
-      use machine
+      use kinddef
 !
       implicit none
 !
diff --git a/gozrineo_stochy.f b/gozrineo_stochy.f
index 30c6d66d..e972b694 100644
--- a/gozrineo_stochy.f
+++ b/gozrineo_stochy.f
@@ -13,7 +13,7 @@ subroutine gozrineo_a_stochy(plnev_a,plnod_a,
 cc
       use stochy_resol_def
       use spectral_layout_mod
-      use machine
+      use kinddef
       implicit none
 cc
       real(kind=kind_dbl_prec) plnev_a(len_trie_ls,latg2)
diff --git a/halo_exchange.fv3.F90 b/halo_exchange.fv3.F90
new file mode 100644
index 00000000..cd267e00
--- /dev/null
+++ b/halo_exchange.fv3.F90
@@ -0,0 +1,99 @@
+module halo_exchange
+
+!> This halo exchange routine is for host model fv3atm and requires FMS
+
+use mpp_mod,                only: mpp_error, FATAL
+use mpp_parameter_mod,      only: EUPDATE, WUPDATE, SUPDATE, NUPDATE
+use mpp_domains_mod,        only: domain2d, mpp_update_domains
+use mpp_domains_mod,        only: mpp_update_domains
+
+implicit none
+private
+
+!--- utility routines
+public ::  atmosphere_scalar_field_halo
+
+contains
+
+!>@brief The subroutine 'atmosphere_scalar_field_halo' is an API to return halo information 
+!! of the current MPI_rank for an input scalar field.
+!>@detail Up to three point haloes can be returned by this API which includes special handling for
+!! the cubed-sphere tile corners. Output will be in (i,j,k) while input can be in (i,j,k) or 
+!! horizontally-packed form (ix,k).
+ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p, &
+                                          isc, iec, jsc, jec, npx, npy, domain_for_coupler)
+   !--------------------------------------------------------------------
+   ! data   - output array to return the field with halo (i,j,k)
+   !          optionally input for field already in (i,j,k) form
+   !          sized to include the halo of the field (+ 2*halo)
+   ! halo   - size of the halo (must be less than 3)
+   ! ied    - horizontal resolution in i-dir with haloes
+   ! jed    - horizontal resolution in j-dir with haloes
+   ! ksize  - vertical resolution
+   ! data_p - optional input field in packed format (ix,k)  
+   !--------------------------------------------------------------------
+   !--- interface variables ---
+   real*8, dimension(1:isize,1:jsize,ksize), intent(inout) :: data !< output array to return the field with halo (i,j,k)
+                                                                   !< optionally input for field already in (i,j,k) form
+                                                                   !< sized to include the halo of the field (+ 2*halo)
+   integer, intent(in) :: halo  !< size of the halo (must be less than 3)
+   integer, intent(in) :: isize !< horizontal resolution in i-dir with haloes
+   integer, intent(in) :: jsize !< horizontal resolution in j-dir with haloes
+   integer, intent(in) :: ksize !< vertical resolution
+   real*8, dimension(:,:), optional, intent(in) :: data_p !< optional input field in packed format (ix,k)
+   integer, intent(in) :: isc, iec, jsc, jec, npx, npy
+   type(domain2d), intent(inout) :: domain_for_coupler
+   !--- local variables ---
+   integer :: i, j, k
+   integer :: ic, jc
+   character(len=44) :: modname = 'atmosphere_mod::atmosphere_scalar_field_halo'
+   integer :: mpp_flags
+
+   !--- perform error checking
+   if (halo .gt. 3) call mpp_error(FATAL, modname//' - halo.gt.3 requires extending the MPP domain to support')
+   ic = isize - 2 * halo
+   jc = jsize - 2 * halo
+
+   !--- if packed data is present, unpack it into the two-dimensional data array
+   if (present(data_p)) then
+     if (ic*jc .ne. size(data_p,1)) call mpp_error(FATAL, modname//' - incorrect sizes for incoming &
+                                                  &variables data and data_p')
+     data = 0.
+     do k = 1, ksize
+       do j = 1, jc
+         do i = 1, ic
+           data(i+halo, j+halo, k) = data_p(i + (j-1)*ic, k)
+         enddo
+       enddo
+     enddo
+   endif
+
+   mpp_flags = EUPDATE + WUPDATE + SUPDATE + NUPDATE
+   if (halo == 1) then
+     call mpp_update_domains(data, domain_for_coupler, flags=mpp_flags, complete=.true.)
+   ! Not needed for cellular automata code
+   !elseif (halo == 3) then
+   !  call mpp_update_domains(data, Atm(mytile)%domain, flags=mpp_flags, complete=.true.)
+   else
+     call mpp_error(FATAL, modname//' - unsupported halo size')
+   endif
+
+   !--- fill the halo points when at a corner of the cubed-sphere tile 
+   !--- interior domain corners are handled correctly
+   if ( (isc==1) .or. (jsc==1) .or. (iec==npx-1) .or. (jec==npy-1) ) then
+     do k = 1, ksize
+       do j=1,halo
+         do i=1,halo
+           if ((isc==    1) .and. (jsc==    1)) data(halo+1-j ,halo+1-i ,k) = data(halo+i     ,halo+1-j ,k)  !SW Corner
+           if ((isc==    1) .and. (jec==npy-1)) data(halo+1-j ,halo+jc+i,k) = data(halo+i     ,halo+jc+j,k)  !NW Corner
+           if ((iec==npx-1) .and. (jsc==    1)) data(halo+ic+j,halo+1-i ,k) = data(halo+ic-i+1,halo+1-j ,k)  !SE Corner
+           if ((iec==npx-1) .and. (jec==npy-1)) data(halo+ic+j,halo+jc+i,k) = data(halo+ic-i+1,halo+jc+j,k)  !NE Corner
+         enddo
+       enddo
+     enddo
+   endif
+
+   return
+ end subroutine atmosphere_scalar_field_halo
+
+end module halo_exchange
diff --git a/initialize_spectral_mod.F90 b/initialize_spectral_mod.F90
index bc82299b..89dd4f8f 100644
--- a/initialize_spectral_mod.F90
+++ b/initialize_spectral_mod.F90
@@ -17,7 +17,7 @@ module initialize_spectral_mod
 !
 !!uses:
 !
-      use machine
+      use kinddef
       use spectral_layout_mod,      only : ipt_lats_node_a, lats_node_a_max,lon_dim_a,len_trie_ls,len_trio_ls &
                                       ,nodes,ls_max_node,lats_dim_a,ls_dim,lat1s_a
       use stochy_layout_lag, only : lat1s_h
@@ -25,12 +25,11 @@ module initialize_spectral_mod
       use spectral_layout_mod,only:lon_dims_a, num_parthds_stochy => ompthreads
       use stochy_resol_def
       use stochy_namelist_def
-      use fv_mp_mod, only : is_master
       use stochy_gg_def, only : wgt_a,sinlat_a,coslat_a,colrad_a,wgtcs_a,rcs2_a,lats_nodes_h,global_lats_h
       use getcon_spectral_mod, only: getcon_spectral
       use get_ls_node_stochy_mod, only: get_ls_node_stochy
       use getcon_lag_stochy_mod, only: getcon_lag_stochy
-      !use mpp_mod
+      !use mpi_wrapper, only : is_master
 #ifndef IBM
       USE omp_lib
 #endif
@@ -66,8 +65,6 @@ subroutine initialize_spectral(gis_stochy, rc)
 !      print*,'before allocate lonsperlat,',&
 !                   allocated(gis_stochy%lonsperlat),'latg=',latg
 !
-!      gis_stochy%nodes=mpp_npes()
-!      print*,'mpp_npes=',mpp_npes()
       nodes  = gis_stochy%nodes
       npe_single_member = gis_stochy%npe_single_member
 
@@ -109,7 +106,6 @@ subroutine initialize_spectral(gis_stochy, rc)
 
       inquire (file="lonsperlat.dat", exist=file_exists)
       if ( .not. file_exists ) then
-        !call mpp_error(FATAL,'Requested lonsperlat.dat  data file does not exist')
          gis_stochy%lonsperlat(:)=lonf
       else
         open (iunit,file='lonsperlat.dat',status='old',form='formatted',      &
diff --git a/kinddef.F90 b/kinddef.F90
new file mode 100644
index 00000000..ef369d95
--- /dev/null
+++ b/kinddef.F90
@@ -0,0 +1,25 @@
+module kinddef
+
+      implicit none
+
+      private
+
+      public :: kind_evod, kind_phys
+      public :: kind_dbl_prec, kind_qdt_prec
+      public :: kind_io4, kind_io8
+
+      integer, parameter :: kind_io4      = 4
+
+      ! DH* TODO - stochastic physics / CA should be using only one of these
+      integer, parameter :: kind_evod     = 8
+      integer, parameter :: kind_phys     = 8
+      integer, parameter :: kind_dbl_prec = 8
+      integer, parameter :: kind_io8      = 8
+
+#ifdef __PGI
+      integer, parameter :: kind_qdt_prec = 8
+#else
+      integer, parameter :: kind_qdt_prec = 16
+#endif
+
+end module kinddef
diff --git a/makefile b/makefile
index 61a4b14a..03abd965 100644
--- a/makefile
+++ b/makefile
@@ -43,6 +43,9 @@ SRCS_f   =  \
 		./dezouv_stochy.f
 
 SRCS_F90 = \
+		./kinddef.F90                               \
+		./mpi_wrapper.F90                           \
+		./halo_exchange.fv3.F90                     \
 		./spectral_layout.F90                       \
 		./getcon_spectral.F90                       \
 		./stochy_namelist_def.F90                   \
@@ -53,9 +56,9 @@ SRCS_F90 = \
 		./stochy_data_mod.F90                       \
 		./get_stochy_pattern.F90                    \
 		./initialize_spectral_mod.F90               \
-                ./cellular_automata_global.F90              \
-                ./cellular_automata_sgs.F90                 \
-                ./update_ca.F90 
+		./cellular_automata_global.F90              \
+		./cellular_automata_sgs.F90                 \
+		./update_ca.F90 
 
 SRCS_c   =
 
diff --git a/mersenne_twister.F b/mersenne_twister.F
new file mode 100644
index 00000000..8cc6bd5e
--- /dev/null
+++ b/mersenne_twister.F
@@ -0,0 +1,500 @@
+!>\file mersenne_twister.f
+!! This file contains the module that calculates random numbers using the 
+!! Mersenne twister
+
+!> \defgroup mersenne_ge Mersenne Twister Module
+!! Module: mersenne_twister   Modern random number generator
+!!\author Iredell          Org: W/NX23     date: 2005-06-14
+!! Abstract: This module calculates random numbers using the Mersenne twister.
+!!   (It has been adapted to a Fortran 90 module from open source software.
+!!   The comments from the original software are given below in the remarks.)
+!!   The Mersenne twister (aka MT19937) is a state-of-the-art random number
+!!   generator based on Mersenne primes and originally developed in 1997 by
+!!   Matsumoto and Nishimura. It has a period before repeating of 2^19937-1,
+!!   which certainly should be good enough for geophysical purposes. :-)
+!!   Considering the algorithm's robustness, it runs fairly speedily.
+!!   (Some timing statistics are given below in the remarks.)
+!!   This adaptation uses the standard Fortran 90 random number interface,
+!!   which can generate an arbitrary number of random numbers at one time.
+!!   The random numbers generated are uniformly distributed between 0 and 1.
+!!   The module also can generate random numbers from a Gaussian distribution
+!!   with mean 0 and standard deviation 1, using a Numerical Recipes algorithm.
+!!   The module also can generate uniformly random integer indices.
+!!   There are also thread-safe versions of the generators in this adaptation,
+!!   necessitating the passing of generator states which must be kept private.
+!
+! Program History Log:
+!   2005-06-14  Mark Iredell
+!
+! Usage: 
+!   The module can be compiled with 4-byte reals or with 8-byte reals, but
+!   4-byte integers are required. The module should be endian-independent.
+!   The Fortran 90 interfaces random_seed and random_number are overloaded
+!   and can be used as in the standard by adding the appropriate use statement
+!     use mersenne_twister
+!   In the below use cases, harvest is a real array of arbitrary size,
+!   and iharvest is an integer array of arbitrary size.
+!   To generate uniformly distributed random numbers between 0 and 1,
+!     call random_number(harvest)
+!   To generate Gaussian distributed random numbers with 0 mean and 1 sigma,
+!     call random_gauss(harvest)
+!   To generate uniformly distributed random integer indices between 0 and n,
+!     call random_index(n,iharvest)
+!   In standard "saved" mode, the random number generator can be used without
+!   setting a seed. But to set a seed, only 1 non-zero integer is required, e.g.
+!     call random_setseed(4357) ! set default seed
+!   The full generator state can be set via the standard interface random_seed,
+!   but it is recommended to use this method only to restore saved states, e.g.
+!     call random_seed(size=lsave)  ! get size of generator state seed array
+!     allocate isave(lsave)         ! allocate seed array
+!     call random_seed(get=isave)   ! fill seed array (then maybe save to disk)
+!     call random_seed(put=isave)   ! restore state (after read from disk maybe)
+!   Locally kept generator states can also be saved in a seed array, e.g.
+!     type(random_stat):: stat
+!     call random_seed(get=isave,stat=stat)  ! fill seed array
+!     call random_seed(put=isave,stat=stat)  ! restore state
+!   To generate random numbers in a threaded region, the "thread-safe" mode
+!   must be used where generator states of type random_state are passed, e.g.
+!     type(random_stat):: stat(8)
+!     do i=1,8                               ! threadable loop
+!       call random_setseed(7171*i,stat(i))  ! thread-safe call
+!     enddo
+!     do i=1,8                               ! threadable loop
+!       call random_number(harvest,stat(i))  ! thread-safe call
+!     enddo
+!     do i=1,8                               ! threadable loop
+!       call random_gauss(harvest,stat(i))   ! thread-safe call
+!     enddo
+!     do i=1,8                               ! threadable loop
+!       call random_index(n,iharvest,stat(i))! thread-safe call
+!     enddo
+!   There is also a relatively inefficient "interactive" mode available, where
+!   setting seeds and generating random numbers are done in the same call.
+!   There is also a functional mode available, returning one value at a time.
+!   
+! Public Defined Types:
+!   random_stat       Generator state (private contents)
+!   
+! Public Subprograms:
+!   random_seed         determine size or put or get state
+!     size              optional integer output size of seed array
+!     put               optional integer(:) input seed array
+!     get               optional integer(:) output seed array
+!     stat              optional type(random_stat) (thread-safe mode)
+!   random_setseed      set seed (thread-safe mode)
+!     inseed            integer seed input
+!     stat              type(random_stat) output
+!   random_setseed      set seed (saved mode)
+!     inseed            integer seed input
+!   random_number       get mersenne twister random numbers (thread-safe mode)
+!     harvest           real(:) numbers output
+!     stat              type(random_stat) input
+!   random_number       get mersenne twister random numbers (saved mode)
+!     harvest           real(:) numbers output
+!   random_number       get mersenne twister random numbers (interactive mode)
+!     harvest           real(:) numbers output
+!     inseed            integer seed input
+!   random_number_f     get mersenne twister random number (functional mode)
+!     harvest           real number output
+!   random_gauss        get gaussian random numbers (thread-safe mode)
+!     harvest           real(:) numbers output
+!     stat              type(random_stat) input
+!   random_gauss        get gaussian random numbers (saved mode)
+!     harvest           real(:) numbers output
+!   random_gauss        get gaussian random numbers (interactive mode)
+!     harvest           real(:) numbers output
+!     inseed            integer seed input
+!   random_gauss_f      get gaussian random number (functional mode)
+!     harvest           real number output
+!   random_index        get random indices (thread-safe mode)
+!     imax              integer maximum index input
+!     iharvest          integer(:) numbers output
+!     stat              type(random_stat) input
+!   random_index        get random indices (saved mode)
+!     imax              integer maximum index input
+!     iharvest          integer(:) numbers output
+!   random_index        get random indices (interactive mode)
+!     imax              integer maximum index input
+!     iharvest          integer(:) numbers output
+!     inseed            integer seed input
+!   random_index_f      get random index (functional mode)
+!     imax              integer maximum index input
+!     iharvest          integer number output
+!
+! Remarks:
+!   (1) Here are the comments in the original open source code:
+!     A C-program for MT19937: Real number version
+!     genrand() generates one pseudorandom real number (double)
+!     which is uniformly distributed on [0,1]-interval, for each
+!     call. sgenrand(seed) set initial values to the working area
+!     of 624 words. Before genrand(), sgenrand(seed) must be
+!     called once. (seed is any 32-bit integer except for 0).
+!     Integer generator is obtained by modifying two lines.
+!     Coded by Takuji Nishimura, considering the suggestions by
+!     Topher Cooper and Marc Rieffel in July-Aug. 1997.
+!     This library is free software; you can redistribute it and/or
+!     modify it under the terms of the GNU Library General Public
+!     License as published by the Free Software Foundation; either
+!     version 2 of the License, or (at your option) any later
+!     version.
+!     This library is distributed in the hope that it will be useful,
+!     but WITHOUT ANY WARRANTY; without even the implied warranty of
+!     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+!     See the GNU Library General Public License for more details.
+!     You should have received a copy of the GNU Library General
+!     Public License along with this library; if not, write to the
+!     Free Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+!     02111-1307  USA
+!     Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura.
+!     When you use this, send an email to: matumoto@math.keio.ac.jp
+!     with an appropriate reference to your work.
+!     Fortran translation by Hiroshi Takano.  Jan. 13, 1999.
+!
+!   (2) On a single IBM Power4 processor on the NCEP operational cluster (2005)
+!     each Mersenne twister random number takes less than 30 ns, about 3 times
+!     slower than the default random number generator, and each random number
+!     from a Gaussian distribution takes less than 150 ns.
+!     
+! Attributes:
+!   Language: Fortran 90
+!
+!$$$
+      module mersenne_twister
+        private
+!  Public declarations
+        public random_stat
+        public random_seed
+        public random_setseed
+        public random_number
+        public random_number_f
+        public random_gauss
+        public random_gauss_f
+        public random_index
+        public random_index_f
+!  Parameters
+        integer,parameter:: n=624
+        integer,parameter:: m=397
+        integer,parameter:: mata=-1727483681     !< constant vector a
+        integer,parameter:: umask=-2147483648    !< most significant w-r bits
+        integer,parameter:: lmask =2147483647    !< least significant r bits
+        integer,parameter:: tmaskb=-1658038656   !< tempering parameter
+        integer,parameter:: tmaskc=-272236544    !< tempering parameter
+        integer,parameter:: mag01(0:1)=(/0,mata/)
+        integer,parameter:: iseed=4357
+        integer,parameter:: nrest=n+6
+!  Defined types
+        type random_stat                         !< Generator state 
+          private
+          integer:: mti=n+1
+          integer:: mt(0:n-1)
+          integer:: iset
+          real:: gset
+        end type
+!  Saved data
+        type(random_stat),save:: sstat
+!  Overloaded interfaces
+        interface random_setseed
+          module procedure random_setseed_s
+          module procedure random_setseed_t
+        end interface
+        interface random_number
+          module procedure random_number_i
+          module procedure random_number_s
+          module procedure random_number_t
+        end interface
+        interface random_gauss
+          module procedure random_gauss_i
+          module procedure random_gauss_s
+          module procedure random_gauss_t
+        end interface
+        interface random_index
+          module procedure random_index_i
+          module procedure random_index_s
+          module procedure random_index_t
+        end interface
+!  All the subprograms
+      contains
+!  Subprogram random_seed
+!> This subroutine sets and gets state; overloads Fortran 90 standard.
+        subroutine random_seed(size,put,get,stat)
+          implicit none
+          integer,intent(out),optional:: size
+          integer,intent(in),optional:: put(nrest)
+          integer,intent(out),optional:: get(nrest)
+          type(random_stat),intent(inout),optional:: stat
+          if(present(size)) then     ! return size of seed array
+!     if(present(put).or.present(get))&
+!      call errmsg('RANDOM_SEED: more than one option set - some ignored')
+            size=nrest
+          elseif(present(put)) then  ! restore from seed array
+!     if(present(get))&
+!      call errmsg('RANDOM_SEED: more than one option set - some ignored')
+            if(present(stat)) then
+              stat%mti=put(1)
+              stat%mt=put(2:n+1)
+              stat%iset=put(n+2)
+              stat%gset=transfer(put(n+3:nrest),stat%gset)
+              if(stat%mti.lt.0.or.stat%mti.gt.n.or.any(stat%mt.eq.0).or.
+     &         stat%iset.lt.0.or.stat%iset.gt.1) then
+                call random_setseed_t(iseed,stat)
+!         call errmsg('RANDOM_SEED: invalid seeds put - default seeds used')
+              endif
+            else
+              sstat%mti=put(1)
+              sstat%mt=put(2:n+1)
+              sstat%iset=put(n+2)
+              sstat%gset=transfer(put(n+3:nrest),sstat%gset)
+              if(sstat%mti.lt.0.or.sstat%mti.gt.n.or.any(sstat%mt.eq.0)
+     &         .or.sstat%iset.lt.0.or.sstat%iset.gt.1) then
+                call random_setseed_t(iseed,sstat)
+!         call errmsg('RANDOM_SEED: invalid seeds put - default seeds used')
+              endif
+            endif
+          elseif(present(get)) then  ! save to seed array
+            if(present(stat)) then
+              if(stat%mti.eq.n+1) call random_setseed_t(iseed,stat)
+              get(1)=stat%mti
+              get(2:n+1)=stat%mt
+              get(n+2)=stat%iset
+              get(n+3:nrest)=transfer(stat%gset,get,nrest-(n+3)+1)
+            else
+              if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+              get(1)=sstat%mti
+              get(2:n+1)=sstat%mt
+              get(n+2)=sstat%iset
+              get(n+3:nrest)=transfer(sstat%gset,get,nrest-(n+3)+1)
+            endif
+          else                       ! reset default seed
+            if(present(stat)) then
+              call random_setseed_t(iseed,stat)
+            else
+              call random_setseed_t(iseed,sstat)
+            endif
+          endif
+        end subroutine
+!  Subprogram random_setseed_s
+!> This subroutine sets seed in saved mode.
+        subroutine random_setseed_s(inseed)
+          implicit none
+          integer,intent(in):: inseed
+          call random_setseed_t(inseed,sstat)
+        end subroutine
+!  Subprogram random_setseed_t
+!> This subroutine sets seed in thread-safe mode.
+        subroutine random_setseed_t(inseed,stat)
+          implicit none
+          integer,intent(in):: inseed
+          type(random_stat),intent(out):: stat
+          integer ii,mti
+          ii=inseed
+          if(ii.eq.0) ii=iseed
+          stat%mti=n
+          stat%mt(0)=iand(ii,-1)
+          do mti=1,n-1
+            stat%mt(mti)=iand(69069*stat%mt(mti-1),-1)
+          enddo
+          stat%iset=0
+          stat%gset=0.
+        end subroutine
+!  Subprogram random_number_f
+!> This function generates random numbers in functional mode.
+        function random_number_f() result(harvest)
+          implicit none
+          real:: harvest
+          real h(1)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_number_t(h,sstat)
+          harvest=h(1)
+        end function
+!  Subprogram random_number_i
+!> This subroutine generates random numbers in interactive mode.
+        subroutine random_number_i(harvest,inseed)
+          implicit none
+          real,intent(out):: harvest(:)
+          integer,intent(in):: inseed
+          type(random_stat) stat
+          call random_setseed_t(inseed,stat)
+          call random_number_t(harvest,stat)
+        end subroutine
+!  Subprogram random_number_s
+!> This subroutine generates random numbers in saved mode; overloads Fortran 90 standard.
+        subroutine random_number_s(harvest)
+          implicit none
+          real,intent(out):: harvest(:)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_number_t(harvest,sstat)
+        end subroutine
+!  Subprogram random_number_t
+!> This subroutine generates random numbers in thread-safe mode.
+        subroutine random_number_t(harvest,stat)
+          implicit none
+          real,intent(out):: harvest(:)
+          type(random_stat),intent(inout):: stat
+          integer j,kk,y
+          integer tshftu,tshfts,tshftt,tshftl
+          tshftu(y)=ishft(y,-11)
+          tshfts(y)=ishft(y,7)
+          tshftt(y)=ishft(y,15)
+          tshftl(y)=ishft(y,-18)
+          do j=1,size(harvest)
+            if(stat%mti.ge.n) then
+              do kk=0,n-m-1
+                y=ior(iand(stat%mt(kk),umask),iand(stat%mt(kk+1),lmask))
+                stat%mt(kk)=ieor(ieor(stat%mt(kk+m),ishft(y,-1)),
+     &   mag01(iand(y,1)))
+              enddo
+              do kk=n-m,n-2
+                y=ior(iand(stat%mt(kk),umask),iand(stat%mt(kk+1),lmask))
+                stat%mt(kk)=ieor(ieor(stat%mt(kk+(m-n)),ishft(y,-1)),
+     &   mag01(iand(y,1)))
+              enddo
+              y=ior(iand(stat%mt(n-1),umask),iand(stat%mt(0),lmask))
+              stat%mt(n-1)=ieor(ieor(stat%mt(m-1),ishft(y,-1)),
+     &   mag01(iand(y,1)))
+              stat%mti=0
+            endif
+            y=stat%mt(stat%mti)
+            y=ieor(y,tshftu(y))
+            y=ieor(y,iand(tshfts(y),tmaskb))
+            y=ieor(y,iand(tshftt(y),tmaskc))
+            y=ieor(y,tshftl(y))
+            if(y.lt.0) then
+              harvest(j)=(real(y)+2.0**32)/(2.0**32-1.0)
+            else
+              harvest(j)=real(y)/(2.0**32-1.0)
+            endif
+            stat%mti=stat%mti+1
+          enddo
+        end subroutine
+!  Subprogram random_gauss_f
+!> This subrouitne generates Gaussian random numbers in functional mode.
+        function random_gauss_f() result(harvest)
+          implicit none
+          real:: harvest
+          real h(1)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_gauss_t(h,sstat)
+          harvest=h(1)
+        end function
+!  Subprogram random_gauss_i
+!> This subrouitne generates Gaussian random numbers in interactive mode.
+        subroutine random_gauss_i(harvest,inseed)
+          implicit none
+          real,intent(out):: harvest(:)
+          integer,intent(in):: inseed
+          type(random_stat) stat
+          call random_setseed_t(inseed,stat)
+          call random_gauss_t(harvest,stat)
+        end subroutine
+!  Subprogram random_gauss_s
+!> This subroutine generates Gaussian random numbers in saved mode.
+        subroutine random_gauss_s(harvest)
+          implicit none
+          real,intent(out):: harvest(:)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_gauss_t(harvest,sstat)
+        end subroutine
+!  Subprogram random_gauss_t
+!> This subroutine generates Gaussian random numbers in thread-safe mode.
+        subroutine random_gauss_t(harvest,stat)
+          implicit none
+          real,intent(out):: harvest(:)
+          type(random_stat),intent(inout):: stat
+          integer mx,my,mz,j
+          real r2(2),r,g1,g2
+          mz=size(harvest)
+          if(mz.le.0) return
+          mx=0
+          if(stat%iset.eq.1) then
+            mx=1
+            harvest(1)=stat%gset
+            stat%iset=0
+          endif
+          my=(mz-mx)/2*2+mx
+          do
+            call random_number_t(harvest(mx+1:my),stat)
+            do j=mx,my-2,2
+              call rgauss(harvest(j+1),harvest(j+2),r,g1,g2)
+              if(r.lt.1.) then
+                harvest(mx+1)=g1
+                harvest(mx+2)=g2
+                mx=mx+2
+              endif
+            enddo
+            if(mx.eq.my) exit
+          enddo
+          if(my.lt.mz) then
+            do
+              call random_number_t(r2,stat)
+              call rgauss(r2(1),r2(2),r,g1,g2)
+              if(r.lt.1.) exit
+            enddo
+            harvest(mz)=g1
+            stat%gset=g2
+            stat%iset=1
+          endif
+        contains
+!> This subroutine contains numerical Recipes algorithm to generate Gaussian random numbers.
+          subroutine rgauss(r1,r2,r,g1,g2)
+            real,intent(in):: r1,r2
+            real,intent(out):: r,g1,g2
+            real v1,v2,fac
+            v1=2.*r1-1.
+            v2=2.*r2-1.
+            r=v1**2+v2**2
+            if(r.lt.1.) then
+              fac=sqrt(-2.*log(r)/r)
+              g1=v1*fac
+              g2=v2*fac
+            endif
+          end subroutine
+        end subroutine
+!  Subprogram random_index_f
+!> This subroutine generates random indices in functional mode.
+        function random_index_f(imax) result(iharvest)
+          implicit none
+          integer,intent(in):: imax
+          integer:: iharvest
+          integer ih(1)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_index_t(imax,ih,sstat)
+          iharvest=ih(1)
+        end function
+!  Subprogram random_index_i
+!> This subroutine generates random indices in interactive mode.
+        subroutine random_index_i(imax,iharvest,inseed)
+          implicit none
+          integer,intent(in):: imax
+          integer,intent(out):: iharvest(:)
+          integer,intent(in):: inseed
+          type(random_stat) stat
+          call random_setseed_t(inseed,stat)
+          call random_index_t(imax,iharvest,stat)
+        end subroutine
+!  Subprogram random_index_s
+!> This subroutine generates random indices in saved mode.
+        subroutine random_index_s(imax,iharvest)
+          implicit none
+          integer,intent(in):: imax
+          integer,intent(out):: iharvest(:)
+          if(sstat%mti.eq.n+1) call random_setseed_t(iseed,sstat)
+          call random_index_t(imax,iharvest,sstat)
+        end subroutine
+!  Subprogram random_index_t
+!> This subroutine  generates random indices in thread-safe mode.
+        subroutine random_index_t(imax,iharvest,stat)
+          implicit none
+          integer,intent(in):: imax
+          integer,intent(out):: iharvest(:)
+          type(random_stat),intent(inout):: stat
+          integer,parameter:: mh=n
+          integer i1,i2,mz
+          real h(mh)
+          mz=size(iharvest)
+          do i1=1,mz,mh
+            i2=min((i1-1)+mh,mz)
+            call random_number_t(h(:i2-(i1-1)),stat)
+            iharvest(i1:i2)=max(ceiling(h(:i2-(i1-1))*imax),1)
+          enddo
+        end subroutine
+      end module
diff --git a/mpi_wrapper.F90 b/mpi_wrapper.F90
new file mode 100644
index 00000000..cde23777
--- /dev/null
+++ b/mpi_wrapper.F90
@@ -0,0 +1,667 @@
+module mpi_wrapper
+
+   implicit none
+
+   private
+
+   public :: mype, npes, root, comm, is_master
+   public :: mpi_wrapper_initialize, mpi_wrapper_finalize
+   public :: mp_reduce_min, mp_reduce_max, mp_reduce_sum
+   public :: mp_bcst, mp_alltoall
+
+#include "mpif.h"
+
+   integer, save :: mype = -999
+   integer, save :: npes = -999
+   integer, save :: root = -999
+   integer, save :: comm = -999
+   logical, save :: initialized = .false.
+
+   integer :: ierror
+
+   INTERFACE mp_bcst
+     MODULE PROCEDURE mp_bcst_i
+     MODULE PROCEDURE mp_bcst_r4
+     MODULE PROCEDURE mp_bcst_r8
+     MODULE PROCEDURE mp_bcst_1d_r4
+     MODULE PROCEDURE mp_bcst_1d_r8
+     MODULE PROCEDURE mp_bcst_2d_r4
+     MODULE PROCEDURE mp_bcst_2d_r8
+     MODULE PROCEDURE mp_bcst_3d_r4
+     MODULE PROCEDURE mp_bcst_3d_r8
+     MODULE PROCEDURE mp_bcst_4d_r4
+     MODULE PROCEDURE mp_bcst_4d_r8
+     MODULE PROCEDURE mp_bcst_1d_i
+     MODULE PROCEDURE mp_bcst_2d_i
+     MODULE PROCEDURE mp_bcst_3d_i
+     MODULE PROCEDURE mp_bcst_4d_i
+   END INTERFACE
+
+   INTERFACE mp_reduce_min
+     MODULE PROCEDURE mp_reduce_min_r4
+     MODULE PROCEDURE mp_reduce_min_r8
+   END INTERFACE
+
+   INTERFACE mp_reduce_max
+     MODULE PROCEDURE mp_reduce_max_r4_1d
+     MODULE PROCEDURE mp_reduce_max_r4
+     MODULE PROCEDURE mp_reduce_max_r8_1d
+     MODULE PROCEDURE mp_reduce_max_r8
+     MODULE PROCEDURE mp_reduce_max_i
+   END INTERFACE
+
+   INTERFACE mp_reduce_sum
+     MODULE PROCEDURE mp_reduce_sum_r4
+     MODULE PROCEDURE mp_reduce_sum_r4_1d
+     MODULE PROCEDURE mp_reduce_sum_r4_1darr
+     MODULE PROCEDURE mp_reduce_sum_r4_2darr
+     MODULE PROCEDURE mp_reduce_sum_r8
+     MODULE PROCEDURE mp_reduce_sum_r8_1d
+     MODULE PROCEDURE mp_reduce_sum_r8_1darr
+     MODULE PROCEDURE mp_reduce_sum_r8_2darr
+   END INTERFACE
+
+   INTERFACE mp_alltoall
+     MODULE PROCEDURE mp_alltoall_r4_1darr
+   END INTERFACE
+
+contains
+
+   logical function is_master()
+      if (mype==root) then
+         is_master = .true.
+      else
+         is_master = .false.
+      end if
+   end function is_master
+
+   subroutine mpi_wrapper_initialize(mpiroot, mpicomm)
+      integer, intent(in) :: mpiroot, mpicomm
+      if (initialized) return
+      root = mpiroot
+      comm = mpicomm
+      call MPI_COMM_RANK(comm, mype, ierror)
+      call MPI_COMM_SIZE(comm, npes, ierror)
+      initialized = .true.
+   end subroutine mpi_wrapper_initialize
+
+   subroutine mpi_wrapper_finalize()
+      if (.not.initialized) return
+      mype = -999
+      npes = -999
+      root = -999
+      comm = -999
+      initialized = .false.
+   end subroutine mpi_wrapper_finalize
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_i :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_i(q)
+         integer, intent(INOUT)  :: q
+
+         call MPI_BCAST(q, 1, MPI_INTEGER, root, comm, ierror)
+
+      end subroutine mp_bcst_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_r4 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_r4(q)
+         real(kind=4), intent(INOUT)  :: q
+
+         call MPI_BCAST(q, 1, MPI_REAL, root, comm, ierror)
+
+      end subroutine mp_bcst_r4
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_r8 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_r8(q)
+         real(kind=8), intent(INOUT)  :: q
+
+         call MPI_BCAST(q, 1, MPI_DOUBLE_PRECISION, root, comm, ierror)
+
+      end subroutine mp_bcst_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_1d_r4 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_1d_r4(q, idim)
+         integer, intent(IN)  :: idim
+         real(kind=4), intent(INOUT)  :: q(idim)
+
+         call MPI_BCAST(q, idim, MPI_REAL, root, comm, ierror)
+
+      end subroutine mp_bcst_1d_r4
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_1d_r8 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_1d_r8(q, idim)
+         integer, intent(IN)  :: idim
+         real(kind=8), intent(INOUT)  :: q(idim)
+
+         call MPI_BCAST(q, idim, MPI_DOUBLE_PRECISION, root, comm, ierror)
+
+      end subroutine mp_bcst_1d_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_2d_r4 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_2d_r4(q, idim, jdim)
+         integer, intent(IN)  :: idim, jdim
+         real(kind=4), intent(INOUT)  :: q(idim,jdim)
+
+         call MPI_BCAST(q, idim*jdim, MPI_REAL, root, comm, ierror)
+
+      end subroutine mp_bcst_2d_r4
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_2d_r8 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_2d_r8(q, idim, jdim)
+         integer, intent(IN)  :: idim, jdim
+         real(kind=8), intent(INOUT)  :: q(idim,jdim)
+
+         call MPI_BCAST(q, idim*jdim, MPI_DOUBLE_PRECISION, root, comm, ierror)
+
+      end subroutine mp_bcst_2d_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_3d_r4 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_3d_r4(q, idim, jdim, kdim)
+         integer, intent(IN)  :: idim, jdim, kdim
+         real(kind=4), intent(INOUT)  :: q(idim,jdim,kdim)
+
+         call MPI_BCAST(q, idim*jdim*kdim, MPI_REAL, root, comm, ierror)
+
+      end subroutine mp_bcst_3d_r4
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_3d_r8 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_3d_r8(q, idim, jdim, kdim)
+         integer, intent(IN)  :: idim, jdim, kdim
+         real(kind=8), intent(INOUT)  :: q(idim,jdim,kdim)
+
+         call MPI_BCAST(q, idim*jdim*kdim, MPI_DOUBLE_PRECISION, root, comm, ierror)
+
+      end subroutine mp_bcst_3d_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!       
+!     mp_bcst_4d_r4 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_4d_r4(q, idim, jdim, kdim, ldim)
+         integer, intent(IN)  :: idim, jdim, kdim, ldim
+         real(kind=4), intent(INOUT)  :: q(idim,jdim,kdim,ldim)
+
+         call MPI_BCAST(q, idim*jdim*kdim*ldim, MPI_REAL, root, comm, ierror)
+     
+      end subroutine mp_bcst_4d_r4
+!     
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!       
+!     mp_bcst_4d_r8 :: Call SPMD broadcast 
+!
+      subroutine mp_bcst_4d_r8(q, idim, jdim, kdim, ldim)
+         integer, intent(IN)  :: idim, jdim, kdim, ldim
+         real(kind=8), intent(INOUT)  :: q(idim,jdim,kdim,ldim)
+
+         call MPI_BCAST(q, idim*jdim*kdim*ldim, MPI_DOUBLE_PRECISION, root, comm, ierror)
+     
+      end subroutine mp_bcst_4d_r8
+!     
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_3d_i :: Call SPMD broadcast
+!
+      subroutine mp_bcst_3d_i(q, idim, jdim, kdim)
+         integer, intent(IN)  :: idim, jdim, kdim
+         integer, intent(INOUT)  :: q(idim,jdim,kdim)
+
+         call MPI_BCAST(q, idim*jdim*kdim, MPI_INTEGER, root, comm, ierror)
+
+      end subroutine mp_bcst_3d_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_1d_i :: Call SPMD broadcast
+!
+      subroutine mp_bcst_1d_i(q, idim)
+         integer, intent(IN)  :: idim
+         integer, intent(INOUT)  :: q(idim)
+
+         call MPI_BCAST(q, idim, MPI_INTEGER, root, comm, ierror)
+
+      end subroutine mp_bcst_1d_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_2d_i :: Call SPMD broadcast
+!
+      subroutine mp_bcst_2d_i(q, idim, jdim)
+         integer, intent(IN)  :: idim, jdim
+         integer, intent(INOUT)  :: q(idim,jdim)
+
+         call MPI_BCAST(q, idim*jdim, MPI_INTEGER, root, comm, ierror)
+
+      end subroutine mp_bcst_2d_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_4d_i :: Call SPMD broadcast
+!
+      subroutine mp_bcst_4d_i(q, idim, jdim, kdim, ldim)
+         integer, intent(IN)  :: idim, jdim, kdim, ldim
+         integer, intent(INOUT)  :: q(idim,jdim,kdim,ldim)
+
+         call MPI_BCAST(q, idim*jdim*kdim*ldim, MPI_INTEGER, root, comm, ierror)
+
+      end subroutine mp_bcst_4d_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!       
+!     mp_reduce_max_r4_1d :: Call SPMD REDUCE_MAX 
+!
+      subroutine mp_reduce_max_r4_1d(mymax,npts)
+         integer, intent(IN)  :: npts
+         real(kind=4), intent(INOUT)  :: mymax(npts)
+        
+         real(kind=4) :: gmax(npts)
+        
+         call MPI_ALLREDUCE( mymax, gmax, npts, MPI_REAL, MPI_MAX, &
+                             comm, ierror )
+      
+         mymax = gmax
+        
+      end subroutine mp_reduce_max_r4_1d
+!     
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!       
+!     mp_reduce_max_r8_1d :: Call SPMD REDUCE_MAX 
+!
+      subroutine mp_reduce_max_r8_1d(mymax,npts)
+         integer, intent(IN)  :: npts
+         real(kind=8), intent(INOUT)  :: mymax(npts)
+        
+         real(kind=8) :: gmax(npts)
+        
+         call MPI_ALLREDUCE( mymax, gmax, npts, MPI_DOUBLE_PRECISION, MPI_MAX, &
+                             comm, ierror )
+      
+         mymax = gmax
+        
+      end subroutine mp_reduce_max_r8_1d
+!     
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_max_r4 :: Call SPMD REDUCE_MAX 
+!
+      subroutine mp_reduce_max_r4(mymax)
+         real(kind=4), intent(INOUT)  :: mymax
+
+         real(kind=4) :: gmax
+
+         call MPI_ALLREDUCE( mymax, gmax, 1, MPI_REAL, MPI_MAX, &
+                             comm, ierror )
+
+         mymax = gmax
+
+      end subroutine mp_reduce_max_r4
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_max_r8 :: Call SPMD REDUCE_MAX 
+!
+      subroutine mp_reduce_max_r8(mymax)
+         real(kind=8), intent(INOUT)  :: mymax
+
+         real(kind=8) :: gmax
+
+         call MPI_ALLREDUCE( mymax, gmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
+                             comm, ierror )
+
+         mymax = gmax
+
+      end subroutine mp_reduce_max_r8
+
+      subroutine mp_reduce_min_r4(mymin)
+         real(kind=4), intent(INOUT)  :: mymin
+
+         real(kind=4) :: gmin
+
+         call MPI_ALLREDUCE( mymin, gmin, 1, MPI_REAL, MPI_MIN, &
+                             comm, ierror )
+
+         mymin = gmin
+
+      end subroutine mp_reduce_min_r4
+
+      subroutine mp_reduce_min_r8(mymin)
+         real(kind=8), intent(INOUT)  :: mymin
+
+         real(kind=8) :: gmin
+
+         call MPI_ALLREDUCE( mymin, gmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, &
+                             comm, ierror )
+
+         mymin = gmin
+
+      end subroutine mp_reduce_min_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_bcst_4d_i :: Call SPMD REDUCE_MAX 
+!
+      subroutine mp_reduce_max_i(mymax)
+         integer, intent(INOUT)  :: mymax
+
+         integer :: gmax
+
+         call MPI_ALLREDUCE( mymax, gmax, 1, MPI_INTEGER, MPI_MAX, &
+                             comm, ierror )
+
+         mymax = gmax
+
+      end subroutine mp_reduce_max_i
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_sum_r4 :: Call SPMD REDUCE_SUM 
+!
+      subroutine mp_reduce_sum_r4(mysum)
+         real(kind=4), intent(INOUT)  :: mysum
+
+         real(kind=4) :: gsum
+
+         call MPI_ALLREDUCE( mysum, gsum, 1, MPI_REAL, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r4
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_sum_r8 :: Call SPMD REDUCE_SUM 
+!
+      subroutine mp_reduce_sum_r8(mysum)
+         real(kind=8), intent(INOUT)  :: mysum
+
+         real(kind=8) :: gsum
+
+         call MPI_ALLREDUCE( mysum, gsum, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r8
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+! !
+!
+!     mp_reduce_sum_r4_1darr :: Call SPMD REDUCE_SUM
+!
+      subroutine mp_reduce_sum_r4_1darr(mysum, npts)
+         integer, intent(in)  :: npts
+         real(kind=4), intent(inout)  :: mysum(npts)
+         real(kind=4)                 :: gsum(npts)
+
+         gsum = 0.0
+         call MPI_ALLREDUCE( mysum, gsum, npts, MPI_REAL, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r4_1darr
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+! !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+! !
+!
+!     mp_reduce_sum_r4_2darr :: Call SPMD REDUCE_SUM
+!
+      subroutine mp_reduce_sum_r4_2darr(mysum, npts1,npts2)
+         integer, intent(in)  :: npts1,npts2
+         real(kind=4), intent(inout)  :: mysum(npts1,npts2)
+         real(kind=4)                 :: gsum(npts1,npts2)
+
+         gsum = 0.0
+         call MPI_ALLREDUCE( mysum, gsum, npts1*npts2, MPI_REAL, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r4_2darr
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+! !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_sum_r4_1d :: Call SPMD REDUCE_SUM 
+!
+      subroutine mp_reduce_sum_r4_1d(mysum, sum1d, npts)
+         integer, intent(in)  :: npts
+         real(kind=4), intent(in)     :: sum1d(npts)
+         real(kind=4), intent(INOUT)  :: mysum
+
+         real(kind=4) :: gsum
+         integer :: i
+
+         mysum = 0.0
+         do i=1,npts
+            mysum = mysum + sum1d(i)
+         enddo 
+
+         call MPI_ALLREDUCE( mysum, gsum, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r4_1d
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv !
+!
+!     mp_reduce_sum_r8_1d :: Call SPMD REDUCE_SUM 
+!
+      subroutine mp_reduce_sum_r8_1d(mysum, sum1d, npts)
+         integer, intent(in)  :: npts
+         real(kind=8), intent(in)     :: sum1d(npts)
+         real(kind=8), intent(INOUT)  :: mysum
+
+         real(kind=8) :: gsum
+         integer :: i
+
+         mysum = 0.0
+         do i=1,npts
+            mysum = mysum + sum1d(i)
+         enddo 
+
+         call MPI_ALLREDUCE( mysum, gsum, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r8_1d
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ !
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+! !
+!
+!     mp_reduce_sum_r8_1darr :: Call SPMD REDUCE_SUM
+!
+      subroutine mp_reduce_sum_r8_1darr(mysum, npts)
+         integer, intent(in)  :: npts
+         real(kind=8), intent(inout)  :: mysum(npts)
+         real(kind=8)                 :: gsum(npts)
+
+         gsum = 0.0
+
+         call MPI_ALLREDUCE( mysum, gsum, npts, MPI_DOUBLE_PRECISION, &
+                             MPI_SUM,                                 &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r8_1darr
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+! !
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+! !
+!
+!     mp_reduce_sum_r8_2darr :: Call SPMD REDUCE_SUM
+!
+      subroutine mp_reduce_sum_r8_2darr(mysum, npts1,npts2)
+         integer, intent(in)  :: npts1,npts2
+         real(kind=8), intent(inout)  :: mysum(npts1,npts2)
+         real(kind=8)                 :: gsum(npts1,npts2)
+
+         gsum = 0.0
+
+         call MPI_ALLREDUCE( mysum, gsum, npts1*npts2,      &
+                             MPI_DOUBLE_PRECISION, MPI_SUM, &
+                             comm, ierror )
+
+         mysum = gsum
+
+      end subroutine mp_reduce_sum_r8_2darr
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+! !
+
+!-------------------------------------------------------------------------------
+! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+! !
+!
+!     mp_reduce_sum_r8_2darr :: Call SPMD REDUCE_SUM
+!
+      subroutine mp_alltoall_r4_1darr(sbuf, ssize, sdispl, rbuf, rsize, rdispl)
+         real(kind=4), intent(in)    :: sbuf(:)
+         real(kind=4), intent(inout) :: rbuf(:)
+         integer, intent(in) :: ssize(:),  rsize(:)
+         integer, intent(in) :: sdispl(:), rdispl(:)
+
+         call MPI_ALLTOALLV( sbuf, ssize, sdispl, MPI_REAL, &
+                             rbuf, rsize, rdispl, MPI_REAL, &
+                             comm, ierror )
+
+      end subroutine mp_alltoall_r4_1darr
+!
+! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+! !
+
+end module mpi_wrapper
diff --git a/pln2eo_stochy.f b/pln2eo_stochy.f
index c66a2a4d..1ff4d262 100644
--- a/pln2eo_stochy.f
+++ b/pln2eo_stochy.f
@@ -15,7 +15,7 @@ subroutine pln2eo_a_stochy(plnev_a,plnod_a,epse,epso,colrad_a,
 !
       use stochy_resol_def
       use spectral_layout_mod
-      use machine
+      use kinddef
       implicit none
 !
 ! define x number constant for real8 start
diff --git a/spectral_layout.F90 b/spectral_layout.F90
index d55ca2b2..291f528c 100644
--- a/spectral_layout.F90
+++ b/spectral_layout.F90
@@ -12,6 +12,7 @@ module spectral_layout_mod
 !
    integer :: nodes,             &
               me,                &
+              master,            &
               lon_dim_a,         &
               ls_dim,            &
               ls_max_node,       &
@@ -34,7 +35,14 @@ module spectral_layout_mod
 
 contains
 
-!
+   logical function is_master()
+      if (me == master) then
+         is_master = .true.
+      else
+         is_master = .false.
+      end if
+   end function is_master
+
    !  interpolation from lat/lon or gaussian grid to other lat/lon grid
    !
 !>@brief The subroutine 'stochy_la2ga' intepolates from the global gaussian grid
@@ -42,7 +50,7 @@ module spectral_layout_mod
 !>@details This code is taken from the legacy spectral GFS
    subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
                            gauout,len,rslmsk, outlat, outlon)
-      use machine , only : kind_io8, kind_io4
+      use kinddef , only : kind_io8, kind_io4
       implicit none
       ! interface variables
       real (kind=kind_io8), intent(in)  :: regin(imxin,jmxin)
diff --git a/standalone_ca.F90 b/standalone_ca.F90
index 8c84b7b4..12a45092 100644
--- a/standalone_ca.F90
+++ b/standalone_ca.F90
@@ -2,10 +2,7 @@ program  standalone_stochy_new
 
 use standalone_stochy_module
 
-use fv_mp_mod,           only: mp_start, domain_decomp
 use atmosphere_stub_mod, only: Atm,atmosphere_init_stub
-use fv_arrays_mod,       only: fv_atmos_type
-use fv_control_mod,      only: setup_pointers
 !use mpp_domains
 use mpp_mod,             only: mpp_set_current_pelist,mpp_get_current_pelist,mpp_init,mpp_pe,mpp_npes ,mpp_declare_pelist
 use mpp_domains_mod,     only: mpp_broadcast_domain,MPP_DOMAIN_TIME,mpp_domains_init ,mpp_domains_set_stack_size
diff --git a/standalone_stochy.F90 b/standalone_stochy.F90
index cb37c121..afb8ca12 100644
--- a/standalone_stochy.F90
+++ b/standalone_stochy.F90
@@ -1,12 +1,9 @@
 program  standalone_stochy
 
 use standalone_stochy_module
-use stochastic_physics, only : init_stochastic_physics,run_stochastic_physics
+use stochastic_physics,  only : init_stochastic_physics,run_stochastic_physics
 
-use fv_mp_mod,       only: mp_start, domain_decomp
 use atmosphere_stub_mod, only: Atm,atmosphere_init_stub
-use fv_arrays_mod,       only: fv_atmos_type
-use fv_control_stub_mod,      only: setup_pointers
 !use mpp_domains
 use mpp_mod,             only: mpp_set_current_pelist,mpp_get_current_pelist,mpp_init,mpp_pe,mpp_npes ,mpp_declare_pelist
 use mpp_domains_mod,     only: mpp_broadcast_domain,MPP_DOMAIN_TIME,mpp_domains_init ,mpp_domains_set_stack_size
diff --git a/standalone_stochy_module.F90 b/standalone_stochy_module.F90
index c33c71d1..ed2038b6 100644
--- a/standalone_stochy_module.F90
+++ b/standalone_stochy_module.F90
@@ -1,6 +1,6 @@
 module  standalone_stochy_module
 
-use machine
+use kinddef
 implicit none
 public
 integer :: isc,jsc,iec,jec,isd,ied,jsd,jed
diff --git a/stochastic_physics.F90 b/stochastic_physics.F90
index 376bb921..abf1fe72 100644
--- a/stochastic_physics.F90
+++ b/stochastic_physics.F90
@@ -2,6 +2,8 @@
 !! the stochastic physics random pattern generators
 module stochastic_physics
 
+use kinddef, only : kind_dbl_prec
+
 implicit none
 
 private
@@ -16,93 +18,99 @@ module stochastic_physics
 !>@details It reads the stochastic physics namelist (nam_stoch and nam_sfcperts)
 !allocates and polulates the necessary arrays
 
-subroutine init_stochastic_physics(Model, Init_parm, ntasks, nthreads)
+subroutine init_stochastic_physics(levs, blksz, dtp, input_nml_file_in, fn_nml, nlunit, &
+    do_sppt_in, do_shum_in, do_skeb_in, do_sfcperts_in, use_zmtnblck_out, skeb_npass_out,                 &
+    nsfcpert_out, pertz0_out, pertzt_out, pertshc_out, pertlai_out, pertalb_out, pertvegf_out,            &
+    ak, bk, nthreads, mpiroot, mpicomm)
 !\callgraph
-use fv_mp_mod, only : is_master
-use stochy_internal_state_mod
+!use stochy_internal_state_mod
 use stochy_data_mod, only : nshum,rpattern_shum,init_stochdata,rpattern_sppt,nsppt,rpattern_skeb,nskeb,gg_lats,gg_lons,&
                             rad2deg,INTTYP,wlon,rnlat,gis_stochy,vfact_skeb,vfact_sppt,vfact_shum,skeb_vpts,skeb_vwts,sl
 use stochy_resol_def , only : latg,lonf,skeblevs
 use stochy_gg_def,only : colrad_a
 use stochy_namelist_def
-use physcons, only: con_pi
-use spectral_layout_mod,only:me,ompthreads,nodes
-use mpp_mod
-#ifdef STOCHY_UNIT_TEST
- use standalone_stochy_module,   only: GFS_control_type, GFS_init_type
-# else
- use GFS_typedefs,       only: GFS_control_type, GFS_init_type
-#endif
+use spectral_layout_mod,only:me,master,nodes,ompthreads
+use mpi_wrapper, only : mpi_wrapper_initialize,mype,npes,is_master
 
 implicit none
-type(GFS_control_type),   intent(inout) :: Model
-type(GFS_init_type),      intent(in)    :: Init_parm
-integer,                  intent(in)    :: ntasks
-integer,                  intent(in)    :: nthreads
 
+! Interface variables
+
+integer,                  intent(in)    :: levs, nlunit, nthreads, mpiroot, mpicomm
+integer,                  intent(in)    :: blksz(:)
+real(kind=kind_dbl_prec), intent(in)    :: dtp
+character(len=*),         intent(in)    :: input_nml_file_in(:)
+character(len=*),         intent(in)    :: fn_nml
+logical,                  intent(in)    :: do_sppt_in, do_shum_in, do_skeb_in, do_sfcperts_in
+logical,                  intent(out)   :: use_zmtnblck_out
+integer,                  intent(out)   :: skeb_npass_out, nsfcpert_out
+
+real(kind=kind_dbl_prec), intent(out)   :: pertz0_out(:),pertzt_out(:),pertshc_out(:)
+real(kind=kind_dbl_prec), intent(out)   :: pertlai_out(:),pertalb_out(:),pertvegf_out(:)
+real(kind=kind_dbl_prec), intent(in)    :: ak(:), bk(:) 
+
+! Local variables
+real(kind=kind_dbl_prec), parameter     :: con_pi =4.0d0*atan(1.0d0)
 integer :: nblks
 integer :: iret
-real*8 :: PRSI(Model%levs),PRSL(Model%levs),dx
+real*8 :: PRSI(levs),PRSL(levs),dx
 real, allocatable :: skeb_vloc(:)
 integer :: k,kflip,latghf,blk,k2
 character*2::proc
 
-! Set/update shared variables in spectral_layout_mod
-ompthreads  = nthreads
+! Initialize MPI and OpenMP
+call mpi_wrapper_initialize(mpiroot,mpicomm)
+me         = mype
+nodes      = npes
+master     = mpiroot
+ompthreads = nthreads
 
 ! ------------------------------------------
 
-nblks = size(Model%blksz)
+nblks = size(blksz)
 
 ! replace
 rad2deg=180.0/con_pi
 INTTYP=0 ! bilinear interpolation
-me=Model%me
-nodes=ntasks
 gis_stochy%me=me
 gis_stochy%nodes=nodes
-call init_stochdata(Model%levs,Model%dtp,Model%input_nml_file,Model%fn_nml,Init_parm%nlunit,iret)
-! check to see decomposition
-!if(Model%isppt_deep == .true.)then
-!do_sppt = .true.
-!endif
+call init_stochdata(levs,dtp,input_nml_file_in,fn_nml,nlunit,iret)
 ! check namelist entries for consistency
-if (Model%do_sppt.neqv.do_sppt) then
+if (do_sppt_in.neqv.do_sppt) then
    write(0,'(*(a))') 'Logic error in stochastic_physics_init: incompatible', &
                    & ' namelist settings do_sppt and sppt'
    return
-else if (Model%do_shum.neqv.do_shum) then
+else if (do_shum_in.neqv.do_shum) then
    write(0,'(*(a))') 'Logic error in stochastic_physics_init: incompatible', &
                    & ' namelist settings do_shum and shum'
    return
-else if (Model%do_skeb.neqv.do_skeb) then
+else if (do_skeb_in.neqv.do_skeb) then
    write(0,'(*(a))') 'Logic error in stochastic_physics_init: incompatible', &
                    & ' namelist settings do_skeb and skeb'
    return
-else if (Model%do_sfcperts.neqv.do_sfcperts) then ! mg, sfc-perts
+else if (do_sfcperts_in.neqv.do_sfcperts) then ! mg, sfc-perts
    write(0,'(*(a))') 'Logic error in stochastic_physics_init: incompatible', &
                    & ' namelist settings do_sfcperts and pertz0 / pertshc / pertzt / pertlai / pertvegf / pertalb'
    return
 end if
 ! update remaining model configuration parameters from namelist
-Model%use_zmtnblck=use_zmtnblck
-Model%skeb_npass=skeb_npass
-Model%nsfcpert=nsfcpert         ! mg, sfc-perts
-Model%pertz0=pertz0         ! mg, sfc-perts
-Model%pertzt=pertzt         ! mg, sfc-perts
-Model%pertshc=pertshc         ! mg, sfc-perts
-Model%pertlai=pertlai         ! mg, sfc-perts
-Model%pertalb=pertalb         ! mg, sfc-perts
-Model%pertvegf=pertvegf         ! mg, sfc-perts
+use_zmtnblck_out=use_zmtnblck
+skeb_npass_out=skeb_npass
+nsfcpert_out=nsfcpert          ! mg, sfc-perts
+pertz0_out=pertz0              ! mg, sfc-perts
+pertzt_out=pertzt              ! mg, sfc-perts
+pertshc_out=pertshc            ! mg, sfc-perts
+pertlai_out=pertlai            ! mg, sfc-perts
+pertalb_out=pertalb            ! mg, sfc-perts
+pertvegf_out=pertvegf          ! mg, sfc-perts
 if ( (.NOT. do_sppt) .AND. (.NOT. do_shum) .AND. (.NOT. do_skeb)  .AND. (.NOT. do_sfcperts) ) return
-allocate(sl(Model%levs))
-do k=1,Model%levs
-   sl(k)= 0.5*(Init_parm%ak(k)/101300.+Init_parm%bk(k)+Init_parm%ak(k+1)/101300.0+Init_parm%bk(k+1)) ! si are now sigmas
-!   if(is_master())print*,'sl(k)',k,sl(k),Init_parm%ak(k),Init_parm%bk(k)
+allocate(sl(levs))
+do k=1,levs
+   sl(k)= 0.5*(ak(k)/101300.+bk(k)+ak(k+1)/101300.0+bk(k+1)) ! si are now sigmas
 enddo
-if (Model%do_sppt) then
-   allocate(vfact_sppt(Model%levs))
-   do k=1,Model%levs
+if (do_sppt) then
+   allocate(vfact_sppt(levs))
+   do k=1,levs
       if (sl(k) .lt. sppt_sigtop1 .and. sl(k) .gt. sppt_sigtop2) then
          vfact_sppt(k) = (sl(k)-sppt_sigtop2)/(sppt_sigtop1-sppt_sigtop2)
       else if (sl(k) .lt. sppt_sigtop2) then
@@ -116,18 +124,18 @@ subroutine init_stochastic_physics(Model, Init_parm, ntasks, nthreads)
        vfact_sppt(1)=0.0
    endif
    if (is_master()) then
-      do k=1,MOdel%levs
+      do k=1,levs
          print *,'sppt vert profile',k,sl(k),vfact_sppt(k)
       enddo
    endif
 endif
-if (Model%do_skeb) then
+if (do_skeb) then
    !print*,'allocating skeb stuff',skeblevs
-   allocate(vfact_skeb(Model%levs))
+   allocate(vfact_skeb(levs))
    allocate(skeb_vloc(skeblevs)) ! local
-   allocate(skeb_vwts(Model%levs,2)) ! save for later
-   allocate(skeb_vpts(Model%levs,2)) ! save for later
-   do k=1,Model%levs
+   allocate(skeb_vwts(levs,2)) ! save for later
+   allocate(skeb_vpts(levs,2)) ! save for later
+   do k=1,levs
       if (sl(k) .lt. skeb_sigtop1 .and. sl(k) .gt. skeb_sigtop2) then
          vfact_skeb(k) = (sl(k)-skeb_sigtop2)/(skeb_sigtop1-skeb_sigtop2)
       else if (sl(k) .lt. skeb_sigtop2) then
@@ -139,16 +147,16 @@ subroutine init_stochastic_physics(Model, Init_parm, ntasks, nthreads)
    enddo
 ! calculate vertical interpolation weights
    do k=1,skeblevs
-      skeb_vloc(k)=sl(1)-real(k-1)/real(skeblevs-1.0)*(sl(1)-sl(Model%levs))
+      skeb_vloc(k)=sl(1)-real(k-1)/real(skeblevs-1.0)*(sl(1)-sl(levs))
    enddo
 ! surface
 skeb_vwts(1,2)=0
 skeb_vpts(1,1)=1
 ! top
-skeb_vwts(Model%levs,2)=1
-skeb_vpts(Model%levs,1)=skeblevs-2
+skeb_vwts(levs,2)=1
+skeb_vpts(levs,1)=skeblevs-2
 ! internal
-DO k=2,Model%levs-1
+DO k=2,levs-1
    DO k2=1,skeblevs-1
       IF (sl(k) .LE. skeb_vloc(k2) .AND. sl(k) .GT. skeb_vloc(k2+1)) THEN
         skeb_vpts(k,1)=k2
@@ -158,7 +166,7 @@ subroutine init_stochastic_physics(Model, Init_parm, ntasks, nthreads)
 ENDDO
 deallocate(skeb_vloc)
 if (is_master()) then
-DO k=1,Model%levs
+DO k=1,levs
    print*,'skeb vpts ',skeb_vpts(k,1),skeb_vwts(k,2)
 ENDDO
 endif
@@ -166,9 +174,9 @@ subroutine init_stochastic_physics(Model, Init_parm, ntasks, nthreads)
 skeb_vpts(:,2)=skeb_vpts(:,1)+1.0
 endif
 
-if (Model%do_shum) then
-   allocate(vfact_shum(Model%levs))
-   do k=1,Model%levs
+if (do_shum) then
+   allocate(vfact_shum(levs))
+   do k=1,levs
       vfact_shum(k) = exp((sl(k)-1.)/shum_sigefold)
       if (sl(k).LT. 2*shum_sigefold) then
          vfact_shum(k)=0.0
@@ -206,26 +214,29 @@ end subroutine init_stochastic_physics
 !>@details It updates the AR(1) in spectral space
 !allocates and polulates the necessary arrays
 
-subroutine run_stochastic_physics(Model, Grid, Coupling, nthreads)
+subroutine run_stochastic_physics(levs, kdt, phour, blksz, xlat, xlon, sppt_wts, shum_wts, skebu_wts, skebv_wts, nthreads)
+
 !\callgraph
-use fv_mp_mod, only : is_master
-use stochy_internal_state_mod
+!use stochy_internal_state_mod
 use stochy_data_mod, only : nshum,rpattern_shum,rpattern_sppt,nsppt,rpattern_skeb,nskeb,&
                             rad2deg,INTTYP,wlon,rnlat,gis_stochy,vfact_sppt,vfact_shum,vfact_skeb
 use get_stochy_pattern_mod,only : get_random_pattern_fv3,get_random_pattern_fv3_vect,dump_patterns
 use stochy_resol_def , only : latg,lonf
-use stochy_namelist_def
-use spectral_layout_mod,only:me,ompthreads
-use mpp_mod
-#ifdef STOCHY_UNIT_TEST
-use standalone_stochy_module,   only: GFS_control_type, GFS_grid_type, GFS_Coupling_type
-#else
-use GFS_typedefs,       only: GFS_control_type, GFS_grid_type, GFS_Coupling_type
-#endif
+use stochy_namelist_def, only : do_shum,do_sppt,do_skeb,fhstoch,nssppt,nsshum,nsskeb,sppt_logit
+use mpi_wrapper, only: is_master
+use spectral_layout_mod,only:ompthreads
 implicit none
-type(GFS_control_type),   intent(in) :: Model
-type(GFS_grid_type),      intent(in) :: Grid(:)
-type(GFS_coupling_type),  intent(inout) :: Coupling(:)
+
+! Interface variables
+integer,                  intent(in) :: levs, kdt
+real(kind=kind_dbl_prec), intent(in) :: phour
+integer,                  intent(in) :: blksz(:)
+real(kind=kind_dbl_prec), intent(in) :: xlat(:,:)
+real(kind=kind_dbl_prec), intent(in) :: xlon(:,:)
+real(kind=kind_dbl_prec), intent(inout) :: sppt_wts(:,:,:)
+real(kind=kind_dbl_prec), intent(inout) :: shum_wts(:,:,:)
+real(kind=kind_dbl_prec), intent(inout) :: skebu_wts(:,:,:)
+real(kind=kind_dbl_prec), intent(inout) :: skebv_wts(:,:,:)
 integer,                  intent(in)    :: nthreads
 
 real,allocatable :: tmp_wts(:,:),tmpu_wts(:,:,:),tmpv_wts(:,:,:)
@@ -236,54 +247,54 @@ subroutine run_stochastic_physics(Model, Grid, Coupling, nthreads)
 character*120 :: sfile
 character*6   :: STRFH
 
-if ( (.NOT. Model%do_sppt) .AND. (.NOT. Model%do_shum) .AND. (.NOT. Model%do_skeb) ) return
+if ( (.NOT. do_sppt) .AND. (.NOT. do_shum) .AND. (.NOT. do_skeb) ) return
 
 ! Update number of threads in shared variables in spectral_layout_mod and set block-related variables
 ompthreads = nthreads
-nblks = size(Model%blksz)
-maxlen = maxval(Model%blksz(:))
+nblks = size(blksz)
+maxlen = maxval(blksz(:))
 
 ! check to see if it is time to write out random patterns
-if (fhstoch.GE. 0 .AND. MOD(Model%phour,fhstoch) .EQ. 0) then
-   write(STRFH,FMT='(I6.6)') nint(Model%phour)
+if (fhstoch.GE. 0 .AND. MOD(phour,fhstoch) .EQ. 0) then
+   write(STRFH,FMT='(I6.6)') nint(phour)
    sfile='stoch_out.F'//trim(STRFH)
    call dump_patterns(sfile)
 endif
 allocate(tmp_wts(nblks,maxlen))
-allocate(tmpu_wts(nblks,maxlen,Model%levs))
-allocate(tmpv_wts(nblks,maxlen,Model%levs))
-if (Model%do_sppt) then
-   if (mod(Model%kdt,nssppt) == 1 .or. nssppt == 1) then
-      call get_random_pattern_fv3(rpattern_sppt,nsppt,gis_stochy,Model,Grid,nblks,maxlen,tmp_wts)
+allocate(tmpu_wts(nblks,maxlen,levs))
+allocate(tmpv_wts(nblks,maxlen,levs))
+if (do_sppt) then
+   if (mod(kdt,nssppt) == 1 .or. nssppt == 1) then
+      call get_random_pattern_fv3(rpattern_sppt,nsppt,gis_stochy,xlat,xlon,blksz,nblks,maxlen,tmp_wts)
       DO blk=1,nblks
-         len=size(Grid(blk)%xlat,1)
-         DO k=1,Model%levs
-            Coupling(blk)%sppt_wts(:,k)=tmp_wts(blk,1:len)*vfact_sppt(k)
+         len=blksz(blk)
+         DO k=1,levs
+            sppt_wts(blk,1:len,k)=tmp_wts(blk,1:len)*vfact_sppt(k)
          ENDDO
-         if (sppt_logit) Coupling(blk)%sppt_wts(:,:) = (2./(1.+exp(Coupling(blk)%sppt_wts(:,:))))-1.
-          Coupling(blk)%sppt_wts(:,:)= Coupling(blk)%sppt_wts(:,:)+1.0
+         if (sppt_logit) sppt_wts(blk,:,:) = (2./(1.+exp(sppt_wts(blk,:,:))))-1.
+         sppt_wts(blk,:,:) = sppt_wts(blk,:,:)+1.0
       ENDDO
    endif
 endif
-if (Model%do_shum) then
-   if (mod(Model%kdt,nsshum) == 1 .or. nsshum == 1) then
-      call get_random_pattern_fv3(rpattern_shum,nshum,gis_stochy,Model,Grid,nblks,maxlen,tmp_wts)
+if (do_shum) then
+   if (mod(kdt,nsshum) == 1 .or. nsshum == 1) then
+      call get_random_pattern_fv3(rpattern_shum,nshum,gis_stochy,xlat,xlon,blksz,nblks,maxlen,tmp_wts)
       DO blk=1,nblks
-         len=size(Grid(blk)%xlat,1)
-         DO k=1,Model%levs
-            Coupling(blk)%shum_wts(:,k)=tmp_wts(blk,1:len)*vfact_shum(k)
+         len=blksz(blk)
+         DO k=1,levs
+            shum_wts(blk,1:len,k)=tmp_wts(blk,1:len)*vfact_shum(k)
          ENDDO
       ENDDO
    endif
 endif
-if (Model%do_skeb) then
-   if (mod(Model%kdt,nsskeb) == 1 .or. nsskeb == 1) then
-      call get_random_pattern_fv3_vect(rpattern_skeb,nskeb,gis_stochy,Model,Grid,nblks,maxlen,tmpu_wts,tmpv_wts)
+if (do_skeb) then
+   if (mod(kdt,nsskeb) == 1 .or. nsskeb == 1) then
+      call get_random_pattern_fv3_vect(rpattern_skeb,nskeb,gis_stochy,levs,xlat,xlon,blksz,nblks,maxlen,tmpu_wts,tmpv_wts)
       DO blk=1,nblks
-         len=size(Grid(blk)%xlat,1)
-         DO k=1,Model%levs
-            Coupling(blk)%skebu_wts(:,k)=tmpu_wts(blk,1:len,k)*vfact_skeb(k)
-            Coupling(blk)%skebv_wts(:,k)=tmpv_wts(blk,1:len,k)*vfact_skeb(k)
+         len=blksz(blk)
+         DO k=1,levs
+            skebu_wts(blk,1:len,k)=tmpu_wts(blk,1:len,k)*vfact_skeb(k)
+            skebv_wts(blk,1:len,k)=tmpv_wts(blk,1:len,k)*vfact_skeb(k)
          ENDDO
       ENDDO
    endif
@@ -299,6 +310,8 @@ end module stochastic_physics
 
 module stochastic_physics_sfc
 
+use kinddef, only : kind_dbl_prec
+
 implicit none
 
 private
@@ -307,24 +320,22 @@ module stochastic_physics_sfc
 
 contains
 
-subroutine run_stochastic_physics_sfc(Model, Grid, Coupling)
+subroutine run_stochastic_physics_sfc(blksz, xlat, xlon, sfc_wts)
+
 !\callgraph
-use fv_mp_mod, only : is_master
+use mpi_wrapper, only : is_master
 use stochy_internal_state_mod
 use stochy_data_mod, only : rad2deg,INTTYP,wlon,rnlat,gis_stochy, rpattern_sfc,npsfc                      ! mg, sfc-perts
 use get_stochy_pattern_mod,only : get_random_pattern_sfc_fv3                                              ! mg, sfc-perts
 use stochy_resol_def , only : latg,lonf
-use stochy_namelist_def
-!use mpp_mod
-#ifdef STOCHY_UNIT_TEST
-  use standalone_stochy_module,   only: GFS_control_type, GFS_grid_type, GFS_Coupling_type
-#else
-use GFS_typedefs,       only: GFS_control_type, GFS_grid_type, GFS_Coupling_type
-#endif
+use stochy_namelist_def, only : do_sfcperts, nsfcpert
 implicit none
-type(GFS_control_type),   intent(in) :: Model
-type(GFS_grid_type),      intent(in) :: Grid(:)
-type(GFS_coupling_type),  intent(inout) :: Coupling(:)
+
+! Interface variables
+integer,                  intent(in) :: blksz(:)
+real(kind=kind_dbl_prec), intent(in) :: xlat(:,:)
+real(kind=kind_dbl_prec), intent(in) :: xlon(:,:)
+real(kind=kind_dbl_prec), intent(out) :: sfc_wts(:,:,:)
 
 real,allocatable :: tmpsfc_wts(:,:,:)
 !D-grid
@@ -333,21 +344,22 @@ subroutine run_stochastic_physics_sfc(Model, Grid, Coupling)
 integer :: nblks, blk, len, maxlen
 character*120 :: sfile
 character*6   :: STRFH
-if (.NOT. Model%do_sfcperts) return
+
+if (.NOT. do_sfcperts) return
 
 ! Set block-related variables
-nblks = size(Model%blksz)
-maxlen = maxval(Model%blksz(:))
+nblks = size(blksz)
+maxlen = maxval(blksz(:))
 
-allocate(tmpsfc_wts(nblks,maxlen,Model%nsfcpert))  ! mg, sfc-perts
+allocate(tmpsfc_wts(nblks,maxlen,nsfcpert))  ! mg, sfc-perts
 if (is_master()) then
   print*,'In run_stochastic_physics_sfc'
 endif
-call get_random_pattern_sfc_fv3(rpattern_sfc,npsfc,gis_stochy,Model,Grid,nblks,maxlen,tmpsfc_wts)
+call get_random_pattern_sfc_fv3(rpattern_sfc,npsfc,gis_stochy,xlat,xlon,blksz,nblks,maxlen,tmpsfc_wts)
 DO blk=1,nblks
-   len=size(Grid(blk)%xlat,1)
-   DO k=1,Model%nsfcpert
-      Coupling(blk)%sfc_wts(:,k)=tmpsfc_wts(blk,1:len,k)
+   len=blksz(blk)
+   DO k=1,nsfcpert
+      sfc_wts(blk,1:len,k)=tmpsfc_wts(blk,1:len,k)
    ENDDO
 ENDDO
 if (is_master()) then
diff --git a/stochy_data_mod.F90 b/stochy_data_mod.F90
index cc6b78e7..84f46311 100644
--- a/stochy_data_mod.F90
+++ b/stochy_data_mod.F90
@@ -9,7 +9,7 @@ module stochy_data_mod
  use stochy_namelist_def
  use constants_mod, only : radius
  use spectral_layout_mod, only : me, nodes
- use fv_mp_mod, only: mp_bcst, is_master
+ use mpi_wrapper, only: mp_bcst, is_master
  use stochy_patterngenerator_mod, only: random_pattern, patterngenerator_init,&
  getnoise, patterngenerator_advance,ndimspec,chgres_pattern,computevarspec_r
  use initialize_spectral_mod, only: initialize_spectral
diff --git a/stochy_gg_def.f b/stochy_gg_def.f
index 15e82d2e..fd558b2a 100644
--- a/stochy_gg_def.f
+++ b/stochy_gg_def.f
@@ -1,6 +1,6 @@
 !>@brief The module 'stochy_gg_def' declares array defining the gaussian grid attributes
       module stochy_gg_def
-      use machine
+      use kinddef
       implicit none
 
       real(kind=kind_dbl_prec), allocatable, dimension(:) ::  colrad_a,
diff --git a/stochy_internal_state_mod.F90 b/stochy_internal_state_mod.F90
index a98ad2e6..220d29ef 100644
--- a/stochy_internal_state_mod.F90
+++ b/stochy_internal_state_mod.F90
@@ -33,6 +33,7 @@ module stochy_internal_state_mod
       type,public::stochy_internal_state		! start type define
 ! -----------------------------------------------
 
+      ! DH* todo remove - is in spectral_layout?
       integer                   :: me, nodes
       integer                   :: lnt2_s, llgg_s
       integer                   :: lnt2
diff --git a/stochy_layout_lag.f b/stochy_layout_lag.f
index 2eeef66a..c3f54946 100644
--- a/stochy_layout_lag.f
+++ b/stochy_layout_lag.f
@@ -1,6 +1,6 @@
 !>@brief The module 'stochy_layout_lag' contains the decomposition attributes of the gaussian grid
       module stochy_layout_lag
-      use machine
+      use kinddef
       implicit none
       save
 cc
diff --git a/stochy_namelist_def.F90 b/stochy_namelist_def.F90
index 0f8e61a4..91bb9531 100644
--- a/stochy_namelist_def.F90
+++ b/stochy_namelist_def.F90
@@ -6,7 +6,7 @@ module stochy_namelist_def
 ! program log
 ! 11 Oct 2016:    Philip Pegion create standalone stochastic physics
 !
-      use machine
+      use kinddef
       implicit none
 
       public
diff --git a/stochy_patterngenerator.F90 b/stochy_patterngenerator.F90
index 7dfdcc27..ec41d4f3 100644
--- a/stochy_patterngenerator.F90
+++ b/stochy_patterngenerator.F90
@@ -4,11 +4,13 @@ module stochy_patterngenerator_mod
 
 !> generate random patterns with specified temporal and spatial auto-correlation
 !! in spherical harmonic space.
- use machine
+ use kinddef
  use spectral_layout_mod, only: len_trie_ls, len_trio_ls, ls_dim, ls_max_node
 ! use mersenne_twister_stochy, only: random_setseed,random_gauss,random_stat
  use mersenne_twister, only: random_setseed,random_gauss,random_stat
- use fv_mp_mod,only: is_master, mp_bcst
+ ! DH* replacing this with mpi_wrapper changes results - look for value of iseed?
+ use mpi_wrapper,only: is_master, mp_bcst
+ ! *DH
  implicit none
  private
 
diff --git a/sumfln_stochy.f b/sumfln_stochy.f
index f91035e9..f0d063c3 100644
--- a/sumfln_stochy.f
+++ b/sumfln_stochy.f
@@ -16,13 +16,11 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
      &                         lons_lat,londi,latl,nvars_0)
 !
       use stochy_resol_def , only : jcap,latgd
-      use spectral_layout_mod   , only : len_trie_ls,len_trio_ls,
-     &                               ls_dim,ls_max_node,me,nodes
-      use machine
+      use spectral_layout_mod, only : len_trie_ls,len_trio_ls,
+     &                                ls_dim,ls_max_node,me,nodes
+      use kinddef
       use spectral_layout_mod, only : num_parthds_stochy => ompthreads
-      !or : use fv_mp_mod ?
-      use mpp_mod, only: mpp_pe,mpp_npes, mpp_alltoall,
-     &                   mpp_get_current_pelist
+      use mpi_wrapper, only : mp_alltoall
 
       implicit none
 !
@@ -31,8 +29,6 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
       integer lat1s(0:jcap),latl2
 !
       integer              nvars,nvars_0
-      integer,                allocatable :: pelist(:)
-      integer :: npes
       real(kind=kind_dbl_prec) flnev(len_trie_ls,2*nvars)
       real(kind=kind_dbl_prec) flnod(len_trio_ls,2*nvars)
 !
@@ -96,10 +92,6 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
       arrsz=2*nvars*ls_dim*workdim*nodes
       num_threads     = min(num_parthds_stochy,nvars)
       nvar_thread_max = (nvars+num_threads-1)/num_threads
-      npes = mpp_npes()
-      my_pe=mpp_pe()
-      allocate(pelist(0:npes-1))
-      call mpp_get_current_pelist(pelist)
       kpts   = 0
 !     write(0,*)' londi=',londi,'nvarsdim=',nvarsdim,'workdim=',workdim
 !
@@ -253,8 +245,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
       end do
       work1dr(1:arrsz)=>workr
       work1ds(1:arrsz)=>works
-      call mpp_alltoall(work1ds, sendcounts, sdispls,
-     &                  work1dr,recvcounts,sdispls,pelist)
+      call mp_alltoall(work1ds,sendcounts,sdispls,
+     &                 work1dr,recvcounts,sdispls)
       nullify(work1dr)
       nullify(work1ds)
 !$omp parallel do num_threads(num_threads)
diff --git a/update_ca.F90 b/update_ca.F90
index 6aa2f79f..d6450b42 100644
--- a/update_ca.F90
+++ b/update_ca.F90
@@ -1,12 +1,9 @@
 module update_ca
 
-#ifdef STOCHY_UNIT_TEST
-use atmosphere_stub_mod,    only: atmosphere_scalar_field_halo
-#else
-use atmosphere_mod,    only: atmosphere_scalar_field_halo
-#endif
-use mersenne_twister,  only: random_gauss,random_stat,random_number
-use fv_mp_mod, only : mp_reduce_sum,mp_bcst,mp_reduce_min,mp_reduce_max
+use halo_exchange,    only: atmosphere_scalar_field_halo
+use mersenne_twister, only: random_gauss,random_stat,random_number
+use mpi_wrapper,      only: mp_reduce_sum,mp_bcst,mp_reduce_min,mp_reduce_max
+use mpp_domains_mod,  only: domain2D
 
 implicit none
 
@@ -16,12 +13,13 @@ module update_ca
 
 contains
 
-subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,iini,ilives, &
+subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,isc,iec,jsc,jec, &
+                        npx,npy,domain_for_coupler,CA,ca_plumes,iini,ilives,       &
                         nlives,ncells,nfracseed,nseed,nthresh,nspinup,nf,nca_plumes)
 
 implicit none
 
-integer, intent(in) :: kstep,nxc,nyc,nlon,nlat,nxch,nych,nca
+integer, intent(in) :: kstep,nxc,nyc,nlon,nlat,nxch,nych,nca,isc,iec,jsc,jec,npx,npy
 integer, intent(in) :: iini(nxc,nyc,nca)
 integer, intent(inout) :: ilives(nxc,nyc,nca)
 real, intent(out) :: CA(nlon,nlat)
@@ -29,6 +27,7 @@ subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,i
 integer, intent(in) :: nlives, ncells, nseed, nspinup, nf
 real, intent(in) :: nfracseed, nthresh
 logical,intent(in) :: nca_plumes
+type(domain2D), intent(inout) :: domain_for_coupler
 real, dimension(nlon,nlat) :: frac
 integer, dimension(nlon,nlat) :: maxlives
 integer,allocatable,save :: board(:,:,:), lives(:,:,:)
@@ -60,7 +59,7 @@ subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,i
  if (.not. allocated(field_in))then
  allocate(field_in(nxc*nyc,1))
  endif
- if(.not. allocated(board_halo))then                                                                      
+ if(.not. allocated(board_halo))then
  allocate(board_halo(nxch,nych,1))   
  endif
   
@@ -93,7 +92,7 @@ subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,i
    enddo
   endif
 
- !Seed with new CA cells at each nseed step                                                                                                                        
+ !Seed with new CA cells at each nseed step
 
   newseed = 0
   if(mod(kstep,nseed) == 0 .and. kstep >= 2)then
@@ -102,7 +101,7 @@ subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,i
      if(board(i,j,nf) == 0 .and. NOISE_B(i,j)>0.95 )then
        newseed(i,j) = 1
      endif
-     board(i,j,nf) = board(i,j,nf) + newseed(i,j)                                                                                                                                 
+     board(i,j,nf) = board(i,j,nf) + newseed(i,j)
     enddo
    enddo
 
@@ -142,7 +141,8 @@ subroutine update_cells_sgs(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,ca_plumes,i
    enddo
   enddo
 
-  call atmosphere_scalar_field_halo(board_halo,halo,nxch,nych,k_in,field_in)
+  call atmosphere_scalar_field_halo(board_halo,halo,nxch,nych,k_in,field_in, &
+                                    isc,iec,jsc,jec,npx,npy,domain_for_coupler)
 
 
   do j=1,nyc
@@ -287,16 +287,18 @@ end subroutine update_cells_sgs
 
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
-subroutine update_cells_global(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,iini_g,ilives_g, &
+subroutine update_cells_global(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,isc,iec,jsc,jec, &
+                        npx,npy,domain_for_coupler,CA,iini_g,ilives_g,                &
                         nlives,ncells,nfracseed,nseed,nthresh,nspinup,nf)
 
 implicit none
 
-integer, intent(in) :: kstep,nxc,nyc,nlon,nlat,nxch,nych,nca
+integer, intent(in) :: kstep,nxc,nyc,nlon,nlat,nxch,nych,nca,isc,iec,jsc,jec,npx,npy
 integer, intent(in) :: iini_g(nxc,nyc,nca), ilives_g(nxc,nyc)
 real, intent(out) :: CA(nlon,nlat)
 integer, intent(in) :: nlives, ncells, nseed, nspinup, nf
 real, intent(in) :: nfracseed, nthresh
+type(domain2D), intent(inout) :: domain_for_coupler
 real, dimension(nlon,nlat) :: frac
 integer,allocatable,save :: board_g(:,:,:), lives_g(:,:,:)
 integer,allocatable :: V(:),L(:)
@@ -390,18 +392,19 @@ subroutine update_cells_global(kstep,nca,nxc,nyc,nxch,nych,nlon,nlat,CA,iini_g,i
  board_halo=0
  field_in=0
 
-!The input to scalar_field_halo needs to be 1D.                                                          
+!The input to scalar_field_halo needs to be 1D.
 !take the updated board_g fields and extract the halo
 ! in order to have updated values in the halo region. 
 
 
-  do j=1,nyc                                                                        
-   do i=1,nxc                                                                                           
-   field_in(i+(j-1)*nxc,1)=board_g(i,j,nf)                                                                
-   enddo                                                                                                
-  enddo                                                                                                  
+  do j=1,nyc
+   do i=1,nxc
+   field_in(i+(j-1)*nxc,1)=board_g(i,j,nf)
+   enddo
+  enddo
 
-  call atmosphere_scalar_field_halo(board_halo,halo,nxch,nych,k_in,field_in)
+  call atmosphere_scalar_field_halo(board_halo,halo,nxch,nych,k_in,field_in, &
+                                    isc,iec,jsc,jec,npx,npy,domain_for_coupler)
 
 
   do j=1,nyc