From 21c655c851bf18241af1a798311e85d4f518ae04 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Thu, 23 Apr 2020 18:44:22 -0400
Subject: [PATCH] +Make wave speed calcs more robust via new options

  Added new options to control the wave speed calculation.  These are set with
optional arguments to wave_speed_init, wave_speed_set_params, wave_speed and
wave_speeds, which are set with the runtime parameters INTERNAL_WAVE_SPEED_TOL,
INTERNAL_WAVE_SPEED_MIN, and INTERNAL_WAVE_SPEED_BETTER_EST.  Also altered the
internal scaling of velocity to make cascading underflows leading to NaNs less
likely.  By default all answers are bitwise identical, but there are three new
runtime parameters and new optional arguments to 4 public interfaces.
---
 src/diagnostics/MOM_diagnostics.F90           |  17 +
 src/diagnostics/MOM_wave_speed.F90            | 478 +++++++++++++-----
 .../lateral/MOM_lateral_mixing_coeffs.F90     |  23 +-
 3 files changed, 382 insertions(+), 136 deletions(-)

diff --git a/src/diagnostics/MOM_diagnostics.F90 b/src/diagnostics/MOM_diagnostics.F90
index ca9ad28b62..77739f3ead 100644
--- a/src/diagnostics/MOM_diagnostics.F90
+++ b/src/diagnostics/MOM_diagnostics.F90
@@ -1451,6 +1451,10 @@ subroutine MOM_diagnostics_init(MIS, ADp, CDp, Time, G, GV, US, param_file, diag
 # include "version_variable.h"
   character(len=40)  :: mdl = "MOM_diagnostics" ! This module's name.
   character(len=48) :: thickness_units, flux_units
+  real :: wave_speed_min      ! A floor in the first mode speed below which 0 is returned [L T-1 ~> m s-1]
+  real :: wave_speed_tol      ! The fractional tolerance for finding the wave speeds [nondim]
+  logical :: better_speed_est ! If true, use a more robust estimate of the first
+                              ! mode wave speed as the starting point for iterations.
   logical :: use_temperature, adiabatic
   logical :: default_2018_answers, remap_answers_2018
   integer :: isd, ied, jsd, jed, IsdB, IedB, JsdB, JedB, nz, nkml, nkbl
@@ -1483,6 +1487,16 @@ subroutine MOM_diagnostics_init(MIS, ADp, CDp, Time, G, GV, US, param_file, diag
                  "The depth below which N2 is limited as monotonic for the "// &
                  "purposes of calculating the equivalent barotropic wave speed.", &
                  units='m', scale=US%m_to_Z, default=-1.)
+  call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_TOL", wave_speed_tol, &
+                 "The fractional tolerance for finding the wave speeds.", &
+                 units="nondim", default=0.001)
+  !### Set defaults so that wave_speed_min*wave_speed_tol >= 1e-9 m s-1
+  call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_MIN", wave_speed_min, &
+                 "A floor in the first mode speed below which 0 used instead.", &
+                 units="m s-1", default=0.0, scale=US%m_s_to_L_T)
+  call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_BETTER_EST", better_speed_est, &
+                 "If true, use a more robust estimate of the first mode wave speed as the "//&
+                 "starting point for iterations.", default=.false.) !### Change the default.
   call get_param(param_file, mdl, "DEFAULT_2018_ANSWERS", default_2018_answers, &
                  "This sets the default value for the various _2018_ANSWERS parameters.", &
                  default=.true.)
@@ -1701,6 +1715,9 @@ subroutine MOM_diagnostics_init(MIS, ADp, CDp, Time, G, GV, US, param_file, diag
   if ((CS%id_cg1>0) .or. (CS%id_Rd1>0) .or. (CS%id_cfl_cg1>0) .or. &
       (CS%id_cfl_cg1_x>0) .or. (CS%id_cfl_cg1_y>0) .or. &
       (CS%id_cg_ebt>0) .or. (CS%id_Rd_ebt>0) .or. (CS%id_p_ebt>0)) then
+    call wave_speed_init(CS%wave_speed_CSp, remap_answers_2018=remap_answers_2018, &
+                         better_speed_est=better_speed_est, min_speed=wave_speed_min, &
+                         wave_speed_tol=wave_speed_tol)
     call wave_speed_init(CS%wave_speed_CSp, remap_answers_2018=remap_answers_2018)
     call safe_alloc_ptr(CS%cg1,isd,ied,jsd,jed)
     if (CS%id_Rd1>0)       call safe_alloc_ptr(CS%Rd1,isd,ied,jsd,jed)
diff --git a/src/diagnostics/MOM_wave_speed.F90 b/src/diagnostics/MOM_wave_speed.F90
index c955c4eb95..65a23e0fa2 100644
--- a/src/diagnostics/MOM_wave_speed.F90
+++ b/src/diagnostics/MOM_wave_speed.F90
@@ -30,6 +30,8 @@ module MOM_wave_speed
                                        !! of the first baroclinic wave speed.
                                        !! This parameter controls the default behavior of wave_speed() which
                                        !! can be overridden by optional arguments.
+  logical :: better_cg1_est = .false.  !< If true, use an improved estimate of the first mode
+                                       !! internal wave speed.
   real :: mono_N2_column_fraction = 0. !< The lower fraction of water column over which N2 is limited as
                                        !! monotonic for the purposes of calculating the equivalent barotropic
                                        !! wave speed. This parameter controls the default behavior of
@@ -38,6 +40,9 @@ module MOM_wave_speed
                                        !! calculating the equivalent barotropic wave speed [Z ~> m].
                                        !! This parameter controls the default behavior of wave_speed() which
                                        !! can be overridden by optional arguments.
+  real :: min_speed2 = 0.              !< The minimum mode 1 internal wave speed squared [L2 T-2 ~> m2 s-2]
+  real :: wave_speed_tol = 0.001       !< The fractional tolerance with which to solve for the wave
+                                       !! speeds [nondim]
   type(remapping_CS) :: remapping_CS   !< Used for vertical remapping when calculating equivalent barotropic
                                        !! mode structure.
   logical :: remap_answers_2018 = .true.  !< If true, use the order of arithmetic and expressions that
@@ -49,8 +54,8 @@ module MOM_wave_speed
 contains
 
 !> Calculates the wave speed of the first baroclinic mode.
-subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
-                      mono_N2_column_fraction, mono_N2_depth, modal_structure)
+subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, mono_N2_column_fraction, &
+                      mono_N2_depth, modal_structure, better_speed_est, min_speed, wave_speed_tol)
   type(ocean_grid_type),            intent(in)  :: G  !< Ocean grid structure
   type(verticalGrid_type),          intent(in)  :: GV !< Vertical grid structure
   type(unit_scale_type),            intent(in)  :: US !< A dimensional unit scaling type
@@ -59,18 +64,24 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
   type(thermo_var_ptrs),            intent(in)  :: tv !< Thermodynamic variables
   real, dimension(SZI_(G),SZJ_(G)), intent(out) :: cg1 !< First mode internal wave speed [L T-1 ~> m s-1]
   type(wave_speed_CS),              pointer     :: CS !< Control structure for MOM_wave_speed
-  logical, optional,                intent(in)  :: full_halos !< If true, do the calculation
+  logical,                optional, intent(in)  :: full_halos !< If true, do the calculation
                                           !! over the entire computational domain.
-  logical, optional,                intent(in)  :: use_ebt_mode !< If true, use the equivalent
+  logical,                optional, intent(in)  :: use_ebt_mode !< If true, use the equivalent
                                           !! barotropic mode instead of the first baroclinic mode.
-  real, optional,                   intent(in)  :: mono_N2_column_fraction !< The lower fraction
+  real,                   optional, intent(in)  :: mono_N2_column_fraction !< The lower fraction
                                           !! of water column over which N2 is limited as monotonic
                                           !! for the purposes of calculating vertical modal structure.
-  real, optional,                   intent(in)  :: mono_N2_depth !< A depth below which N2 is limited as
+  real,                   optional, intent(in)  :: mono_N2_depth !< A depth below which N2 is limited as
                                           !! monotonic for the purposes of calculating vertical
                                           !! modal structure [Z ~> m].
   real, dimension(SZI_(G),SZJ_(G),SZK_(G)), &
-        optional,                   intent(out) :: modal_structure !< Normalized model structure [nondim]
+                          optional, intent(out) :: modal_structure !< Normalized model structure [nondim]
+  logical, optional, intent(in) :: better_speed_est !< If true, use a more robust estimate of the first
+                                     !! mode speed as the starting point for iterations.
+  real,    optional, intent(in) :: min_speed !< If present, set a floor in the first mode speed
+                                     !! below which 0 is returned [L T-1 ~> m s-1].
+  real,    optional, intent(in) :: wave_speed_tol !< The fractional tolerance for finding the
+                                     !! wave speeds [nondim]
 
   ! Local variables
   real, dimension(SZK_(G)+1) :: &
@@ -79,6 +90,8 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
     pres, &       ! Interface pressure [R L2 T-2 ~> Pa]
     T_int, &      ! Temperature interpolated to interfaces [degC]
     S_int, &      ! Salinity interpolated to interfaces [ppt]
+    H_top, &      ! The distance of each filtered interface from the ocean surface [Z ~> m]
+    H_bot, &      ! The distance of each filtered interface from the bottom [Z ~> m]
     gprime        ! The reduced gravity across each interface [L2 Z-1 T-2 ~> m s-2].
   real, dimension(SZK_(G)) :: &
     Igl, Igu, Igd ! The inverse of the reduced gravity across an interface times
@@ -95,6 +108,7 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
     Sc, &         ! A column of layer salinites after convective istabilities are removed [ppt]
     Rc, &         ! A column of layer densities after convective istabilities are removed [R ~> kg m-3]
     Hc_H          ! Hc(:) rescaled from Z to thickness units [H ~> m or kg m-2]
+  real :: I_Htot  ! The inverse of the total filtered thicknesses [Z ~> m]
   real :: det, ddet, detKm1, detKm2, ddetKm1, ddetKm2
   real :: lam     ! The eigenvalue [T2 L-2 ~> s m-1]
   real :: dlam    ! The change in estimates of the eigenvalue [T2 L-2 ~> s m-1]
@@ -108,22 +122,28 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
     HxS_here, &    ! A layer integrated salinity [ppt Z ~> ppt m]
     HxR_here       ! A layer integrated density [R Z ~> kg m-2]
   real :: speed2_tot ! overestimate of the mode-1 speed squared [L2 T-2 ~> m2 s-2]
+  real :: cg1_min2 ! A floor in the squared first mode speed below which 0 is returned [L2 T-2 ~> m2 s-2]
   real :: I_Hnew   ! The inverse of a new layer thickness [Z-1 ~> m-1]
-  real :: drxh_sum ! The sum of density diffrences across interfaces times thicknesses [R Z ~> kg m-2]
+  real :: drxh_sum ! The sum of density differences across interfaces times thicknesses [R Z ~> kg m-2]
   real :: L2_to_Z2 ! A scaling factor squared from units of lateral distances to depths [Z2 L-2 ~> 1].
-  real, parameter :: tol1  = 0.0001, tol2 = 0.001
   real, pointer, dimension(:,:,:) :: T => NULL(), S => NULL()
-  real :: g_Rho0  ! G_Earth/Rho0 [L2 T-2 Z-1 R-1 ~> m4 s-2 kg-1].
+  real :: g_Rho0   ! G_Earth/Rho0 [L2 T-2 Z-1 R-1 ~> m4 s-2 kg-1].
   real :: c2_scale ! A scaling factor for wave speeds to help control the growth of the determinant
                    ! and its derivative with lam between rows of the Thomas algorithm solver.  The
                    ! exact value should not matter for the final result if it is an even power of 2.
+  real :: tol_Hfrac ! Layers that together are smaller than this fraction of
+                    ! the total water column can be merged for efficiency.
+  real :: tol_solve ! The fractional tolerance with which to solve for the wave speeds [nondim]
+  real :: tol_merge ! The fractional change in estimated wave speed that is allowed
+                    ! when deciding to merge layers in the calculation [nondim]
   real :: rescale, I_rescale
-  integer :: kf(SZI_(G))
+  integer :: kf(SZI_(G)) ! The number of active layers after filtering.
   integer, parameter :: max_itt = 10
   real :: lam_it(max_itt), det_it(max_itt), ddet_it(max_itt)
-  logical :: use_EOS    ! If true, density is calculated from T & S using an
-                        ! equation of state.
-  integer :: kc
+  logical :: use_EOS    ! If true, density is calculated from T & S using an equation of state.
+  logical :: better_est ! If true, use an improved estimate of the first mode internal wave speed.
+  logical :: merge      ! If true, merge the current layer with the one above.
+  integer :: kc         ! The number of layers in the column after merging
   integer :: i, j, k, k2, itt, is, ie, js, je, nz
   real :: hw, sum_hc
   real :: gp      ! A limited local copy of gprime [L2 Z-1 T-2 ~> m s-2]
@@ -162,22 +182,38 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
   Z_to_pres = GV%Z_to_H * (GV%H_to_RZ * GV%g_Earth)
   use_EOS = associated(tv%eqn_of_state)
 
+  better_est = CS%better_cg1_est ; if (present(better_speed_est)) better_est = better_speed_est
+
+  if (better_est) then
+    tol_solve = CS%wave_speed_tol ; if (present(wave_speed_tol)) tol_solve = wave_speed_tol
+    tol_Hfrac  = 0.1*tol_solve ; tol_merge = tol_solve / real(nz)
+  else
+    tol_solve = 0.001 ; tol_Hfrac  = 0.0001 ; tol_merge = 0.001
+  endif
+
+  ! The rescaling below can control the growth of the determinant provided that
+  ! (tol_merge*cg1_min2/c2_scale > I_rescale).  For default values, this suggests a stable lower
+  ! bound on min_speed of sqrt(nz/(tol_solve*rescale)) or 3e2/1024**2 = 2.9e-4 m/s for 90 layers.
+  ! The upper bound on the rate of increase in the determinant is g'H/c2_scale < rescale or in the
+  ! worst possible oceanic case of g'H < 0.5*10m/s2*1e4m = 5.e4 m2/s2 < 1024**2*c2_scale, suggesting
+  ! that c2_scale can safely be set to 1/(16*1024**2), which would decrease the stable floor on
+  ! min_speed to ~6.9e-8 m/s for 90 layers or 2.33e-7 m/s for 1000 layers.
+  cg1_min2 = CS%min_speed2 ; if (present(min_speed)) cg1_min2 = min_speed**2
   rescale = 1024.0**4 ; I_rescale = 1.0/rescale
-  ! The following two lines give identical results:
-  ! c2_scale = 16.0 * US%m_s_to_L_T**2
-  c2_scale = US%m_s_to_L_T**2
+  c2_scale = US%m_s_to_L_T**2 / 4096.0**2 ! Other powers of 2 give identical results.
 
-  min_h_frac = tol1 / real(nz)
+  min_h_frac = tol_Hfrac / real(nz)
 !$OMP parallel do default(none) shared(is,ie,js,je,nz,h,G,GV,US,min_h_frac,use_EOS,T,S,tv,&
 !$OMP                                  calc_modal_structure,l_use_ebt_mode,modal_structure, &
 !$OMP                                  l_mono_N2_column_fraction,l_mono_N2_depth,CS,   &
-!$OMP                                  Z_to_pres,cg1,g_Rho0,rescale,I_rescale,L2_to_Z2,c2_scale) &
+!$OMP                                  Z_to_pres,cg1,g_Rho0,rescale,I_rescale,L2_to_Z2, &
+!$OMP                                  better_est,cg1_min2,tol_merge,tol_solve,c2_scale) &
 !$OMP                          private(htot,hmin,kf,H_here,HxT_here,HxS_here,HxR_here, &
-!$OMP                                  Hf,Tf,Sf,Rf,pres,T_int,S_int,drho_dT,           &
-!$OMP                                  drho_dS,drxh_sum,kc,Hc,Hc_H,Tc,Sc,I_Hnew,gprime,&
+!$OMP                                  Hf,Tf,Sf,Rf,pres,T_int,S_int,drho_dT,drho_dS,   &
+!$OMP                                  drxh_sum,kc,Hc,Hc_H,tC,sc,I_Hnew,gprime,&
 !$OMP                                  Rc,speed2_tot,Igl,Igu,Igd,lam0,lam,lam_it,dlam, &
 !$OMP                                  mode_struct,sum_hc,N2min,gp,hw,                 &
-!$OMP                                  ms_min,ms_max,ms_sq,                            &
+!$OMP                                  ms_min,ms_max,ms_sq,H_top,H_bot,I_Htot,merge,   &
 !$OMP                                  det,ddet,detKm1,ddetKm1,detKm2,ddetKm2,det_it,ddet_it)
   do j=js,je
     !   First merge very thin layers with the one above (or below if they are
@@ -232,52 +268,85 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
       endif ; enddo
     endif
 
-    ! From this point, we can work on individual columns without causing memory
-    ! to have page faults.
+    ! From this point, we can work on individual columns without causing memory to have page faults.
     do i=is,ie ; if (G%mask2dT(i,j) > 0.5) then
       if (use_EOS) then
-        pres(1) = 0.0
-        do k=2,kf(i)
-          pres(k) = pres(k-1) + Z_to_pres*Hf(k-1,i)
-          T_int(k) = 0.5*(Tf(k,i)+Tf(k-1,i))
-          S_int(k) = 0.5*(Sf(k,i)+Sf(k-1,i))
+        pres(1) = 0.0 ; H_top(1) = 0.0
+        do K=2,kf(i)
+          pres(K) = pres(K-1) + Z_to_pres*Hf(k-1,i)
+          T_int(K) = 0.5*(Tf(k,i)+Tf(k-1,i))
+          S_int(K) = 0.5*(Sf(k,i)+Sf(k-1,i))
+          H_top(K) = H_top(K-1) + Hf(k-1,i)
         enddo
         call calculate_density_derivs(T_int, S_int, pres, drho_dT, drho_dS, &
                                       tv%eqn_of_state, (/2,kf(i)/) )
 
-        ! Sum the reduced gravities to find out how small a density difference
-        ! is negligibly small.
+        ! Sum the reduced gravities to find out how small a density difference is negligibly small.
         drxh_sum = 0.0
-        do k=2,kf(i)
-          drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
-              max(0.0,drho_dT(k)*(Tf(k,i)-Tf(k-1,i)) + &
-                      drho_dS(k)*(Sf(k,i)-Sf(k-1,i)))
-        enddo
+        if (better_est) then
+          ! This is an estimate that is correct for the non-EBT mode for 2 or 3 layers, or for
+          ! clusters of massless layers at interfaces that can be grouped into 2 or 3 layers.
+          ! For a uniform stratification and a huge number of layers uniformly distributed in
+          ! density, this estimate is too large (as is desired) by a factor of pi^2/6 ~= 1.64.
+          if (H_top(kf(i)) > 0.0) then
+            I_Htot = 1.0 / (H_top(kf(i)) + Hf(kf(i),i))  ! = 1.0 / (H_top(K) + H_bot(K)) for all K.
+            H_bot(kf(i)+1) = 0.0
+            do K=kf(i),2,-1
+              H_bot(K) = H_bot(K+1) + Hf(k,i)
+              drxh_sum = drxh_sum + ((H_top(K) * H_bot(K)) * I_Htot) * &
+                  max(0.0, drho_dT(K)*(Tf(k,i)-Tf(k-1,i)) + drho_dS(K)*(Sf(k,i)-Sf(k-1,i)))
+            enddo
+          endif
+        else
+          ! This estimate is problematic in that it goes like 1/nz for a large number of layers,
+          ! but it is an overestimate (as desired) for a small number of layers, by at a factor
+          ! of (H1+H2)**2/(H1*H2) >= 4 for two thick layers.
+          do K=2,kf(i)
+            drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
+                max(0.0, drho_dT(K)*(Tf(k,i)-Tf(k-1,i)) + drho_dS(K)*(Sf(k,i)-Sf(k-1,i)))
+          enddo
+        endif
       else
         drxh_sum = 0.0
-        do k=2,kf(i)
-          drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
-                            max(0.0,Rf(k,i)-Rf(k-1,i))
-        enddo
-      endif
-
-      if (calc_modal_structure) then
-        mode_struct(:) = 0.
+        if (better_est) then
+          H_top(1) = 0.0
+          do K=2,kf(i) ; H_top(K) = H_top(K-1) + Hf(k-1,i) ; enddo
+          if (H_top(kf(i)) > 0.0) then
+            I_Htot = 1.0 / (H_top(kf(i)) + Hf(kf(i),i))  ! = 1.0 / (H_top(K) + H_bot(K)) for all K.
+            H_bot(kf(i)+1) = 0.0
+            do K=kf(i),2,-1
+              H_bot(K) = H_bot(K+1) + Hf(k,i)
+              drxh_sum = drxh_sum + ((H_top(K) * H_bot(K)) * I_Htot) * max(0.0,Rf(k,i)-Rf(k-1,i))
+            enddo
+          endif
+        else
+          do K=2,kf(i)
+            drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * max(0.0,Rf(k,i)-Rf(k-1,i))
+          enddo
+        endif
       endif
 
-  !   Find gprime across each internal interface, taking care of convective
-  ! instabilities by merging layers.
-      if (drxh_sum <= 0.0) then
+      !   Find gprime across each internal interface, taking care of convective instabilities by
+      ! merging layers.  If the estimated wave speed is too small, simply return zero.
+      if (g_Rho0 * drxh_sum <= cg1_min2) then
         cg1(i,j) = 0.0
+        if (present(modal_structure)) modal_structure(i,j,:) = 0.
       else
         ! Merge layers to eliminate convective instabilities or exceedingly
-        ! small reduced gravities.
+        ! small reduced gravities.  Merging layers reduces the estimated wave speed by
+        ! (rho(2)-rho(1))*h(1)*h(2) / H_tot.
         if (use_EOS) then
           kc = 1
           Hc(1) = Hf(1,i) ; Tc(1) = Tf(1,i) ; Sc(1) = Sf(1,i)
           do k=2,kf(i)
-            if ((drho_dT(k)*(Tf(k,i)-Tc(kc)) + drho_dS(k)*(Sf(k,i)-Sc(kc))) * &
-                (Hc(kc) + Hf(k,i)) < 2.0 * tol2*drxh_sum) then
+            if (better_est) then
+              merge = ((drho_dT(K)*(Tf(k,i)-Tc(kc)) + drho_dS(K)*(Sf(k,i)-Sc(kc))) * &
+                       ((Hc(kc) * Hf(k,i))*I_Htot) < 2.0 * tol_merge*drxh_sum)
+            else
+              merge = ((drho_dT(K)*(Tf(k,i)-Tc(kc)) + drho_dS(K)*(Sf(k,i)-Sc(kc))) * &
+                       (Hc(kc) + Hf(k,i)) < 2.0 * tol_merge*drxh_sum)
+            endif
+            if (merge) then
               ! Merge this layer with the one above and backtrack.
               I_Hnew = 1.0 / (Hc(kc) + Hf(k,i))
               Tc(kc) = (Hc(kc)*Tc(kc) + Hf(k,i)*Tf(k,i)) * I_Hnew
@@ -286,9 +355,15 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
               ! Backtrack to remove any convective instabilities above...  Note
               ! that the tolerance is a factor of two larger, to avoid limit how
               ! far back we go.
-              do k2=kc,2,-1
-                if ((drho_dT(k2)*(Tc(k2)-Tc(k2-1)) + drho_dS(k2)*(Sc(k2)-Sc(k2-1))) * &
-                    (Hc(k2) + Hc(k2-1)) < tol2*drxh_sum) then
+              do K2=kc,2,-1
+                if (better_est) then
+                  merge = ((drho_dT(K2)*(Tc(k2)-Tc(k2-1)) + drho_dS(K2)*(Sc(k2)-Sc(k2-1))) * &
+                           ((Hc(k2) * Hc(k2-1))*I_Htot) < tol_merge*drxh_sum)
+                else
+                  merge = ((drho_dT(K2)*(Tc(k2)-Tc(k2-1)) + drho_dS(K2)*(Sc(k2)-Sc(k2-1))) * &
+                           (Hc(k2) + Hc(k2-1)) < tol_merge*drxh_sum)
+                endif
+                if (merge) then
                   ! Merge the two bottommost layers.  At this point kc = k2.
                   I_Hnew = 1.0 / (Hc(kc) + Hc(kc-1))
                   Tc(kc-1) = (Hc(kc)*Tc(kc) + Hc(kc-1)*Tc(kc-1)) * I_Hnew
@@ -300,21 +375,25 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
             else
               ! Add a new layer to the column.
               kc = kc + 1
-              drho_dS(kc) = drho_dS(k) ; drho_dT(kc) = drho_dT(k)
+              drho_dS(Kc) = drho_dS(K) ; drho_dT(Kc) = drho_dT(K)
               Tc(kc) = Tf(k,i) ; Sc(kc) = Sf(k,i) ; Hc(kc) = Hf(k,i)
             endif
           enddo
           ! At this point there are kc layers and the gprimes should be positive.
-          do k=2,kc ! Revisit this if non-Boussinesq.
-            gprime(k) = g_Rho0 * (drho_dT(k)*(Tc(k)-Tc(k-1)) + &
-                                  drho_dS(k)*(Sc(k)-Sc(k-1)))
+          do K=2,kc ! Revisit this if non-Boussinesq.
+            gprime(K) = g_Rho0 * (drho_dT(K)*(Tc(k)-Tc(k-1)) + drho_dS(K)*(Sc(k)-Sc(k-1)))
           enddo
         else  ! .not.use_EOS
           ! Do the same with density directly...
           kc = 1
           Hc(1) = Hf(1,i) ; Rc(1) = Rf(1,i)
           do k=2,kf(i)
-            if ((Rf(k,i) - Rc(kc)) * (Hc(kc) + Hf(k,i)) < 2.0*tol2*drxh_sum) then
+            if (better_est) then
+              merge = ((Rf(k,i) - Rc(kc)) * ((Hc(kc) * Hf(k,i))*I_Htot) < 2.0*tol_merge*drxh_sum)
+            else
+              merge = ((Rf(k,i) - Rc(kc)) * (Hc(kc) + Hf(k,i)) < 2.0*tol_merge*drxh_sum)
+            endif
+            if (merge) then
               ! Merge this layer with the one above and backtrack.
               Rc(kc) = (Hc(kc)*Rc(kc) + Hf(k,i)*Rf(k,i)) / (Hc(kc) + Hf(k,i))
               Hc(kc) = (Hc(kc) + Hf(k,i))
@@ -322,7 +401,12 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
               ! that the tolerance is a factor of two larger, to avoid limit how
               ! far back we go.
               do k2=kc,2,-1
-                if ((Rc(k2)-Rc(k2-1)) * (Hc(k2)+Hc(k2-1)) < tol2*drxh_sum) then
+                if (better_est) then
+                  merge = ((Rc(k2)-Rc(k2-1)) * ((Hc(k2) * Hc(k2-1))*I_Htot) < tol_merge*drxh_sum)
+                else
+                  merge = ((Rc(k2)-Rc(k2-1)) * (Hc(k2)+Hc(k2-1)) < tol_merge*drxh_sum)
+                endif
+                if (merge) then
                   ! Merge the two bottommost layers.  At this point kc = k2.
                   Rc(kc-1) = (Hc(kc)*Rc(kc) + Hc(kc-1)*Rc(kc-1)) / (Hc(kc) + Hc(kc-1))
                   Hc(kc-1) = (Hc(kc) + Hc(kc-1))
@@ -336,8 +420,8 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
             endif
           enddo
           ! At this point there are kc layers and the gprimes should be positive.
-          do k=2,kc ! Revisit this if non-Boussinesq.
-            gprime(k) = g_Rho0 * (Rc(k)-Rc(k-1))
+          do K=2,kc ! Revisit this if non-Boussinesq.
+            gprime(K) = g_Rho0 * (Rc(k)-Rc(k-1))
           enddo
         endif  ! use_EOS
 
@@ -346,6 +430,13 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
         ! non-leading diagonals of the tridiagonal matrix.
         if (kc >= 2) then
           speed2_tot = 0.0
+          if (better_est) then
+            H_top(1) = 0.0 ; H_bot(kc+1) = 0.0
+            do K=2,kc+1 ; H_top(K) = H_top(K-1) + Hc(k-1) ; enddo
+            do K=kc,2,-1 ; H_bot(K) = H_bot(K+1) + Hc(k) ; enddo
+            I_Htot = 0.0 ; if (H_top(kc+1) > 0.0) I_Htot = 1.0 / H_top(kc+1)
+          endif
+
           if (l_use_ebt_mode) then
             Igu(1) = 0. ! Neumann condition for pressure modes
             sum_hc = Hc(1)
@@ -366,23 +457,33 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
               endif
               Igu(k) = 1.0/(gp*Hc(k))
               Igl(k-1) = 1.0/(gp*Hc(k-1))
-              speed2_tot = speed2_tot + gprime(k)*(Hc(k-1)+Hc(k))*0.707
               sum_hc = sum_hc + Hc(k)
+              if (better_est) then
+                ! Estimate that the ebt_mode is sqrt(2) times the speed of the flat bottom modes.
+                speed2_tot = speed2_tot + 2.0 * gprime(K)*((H_top(K) * H_bot(K)) * I_Htot)
+              else ! The ebt_mode wave should be faster than the flat-bottom mode, so 0.707 should be > 1?
+                speed2_tot = speed2_tot + gprime(K)*(Hc(k-1)+Hc(k))*0.707
+              endif
             enddo
            !Igl(kc) = 0. ! Neumann condition for pressure modes
             Igl(kc) = 2.*Igu(kc) ! Dirichlet condition for pressure modes
           else ! .not. l_use_ebt_mode
             do K=2,kc
               Igl(K) = 1.0/(gprime(K)*Hc(k)) ; Igu(K) = 1.0/(gprime(K)*Hc(k-1))
-              speed2_tot = speed2_tot + gprime(k)*(Hc(k-1)+Hc(k))
+              if (better_est) then
+                speed2_tot = speed2_tot + gprime(K)*((H_top(K) * H_bot(K)) * I_Htot)
+              else
+                speed2_tot = speed2_tot + gprime(K)*(Hc(k-1)+Hc(k))
+              endif
             enddo
           endif
 
           if (calc_modal_structure) then
+            mode_struct(:) = 0.
             mode_struct(1:kc) = 1. ! Uniform flow, first guess
           endif
 
-          ! Overestimate the speed to start with.
+          ! Under estimate the first eigenvalue (overestimate the speed) to start with.
           if (calc_modal_structure) then
             lam0 = 0.5 / speed2_tot ; lam = lam0
           else
@@ -417,7 +518,7 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
               ! of the matrix are
               !    /  b(2)-lam  igl(2)      0       0     0  ...  |
               !    |  igu(3)  b(3)-lam   igl(3)     0     0  ...  |
-              !    |    0       igu43)  b(4)-lam  igl(4)  0  ...  |
+              !    |    0       igu(4)  b(4)-lam  igl(4)  0  ...  |
               ! which is consistent if the eigenvalue problem is for vertical velocity modes.
               detKm1 = 1.0 ; ddetKm1 = 0.0
               det = (Igu(2) + Igl(2) - lam) ; ddet = -1.0
@@ -479,7 +580,7 @@ subroutine wave_speed(h, tv, G, GV, US, cg1, CS, full_halos, use_ebt_mode, &
               endif
             endif
 
-            if (abs(dlam) < tol2*lam) exit
+            if (abs(dlam) < tol_solve*lam) exit
           enddo
 
           cg1(i,j) = 0.0
@@ -567,17 +668,24 @@ subroutine tdma6(n, a, b, c, lam, y)
 end subroutine tdma6
 
 !> Calculates the wave speeds for the first few barolinic modes.
-subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
-  type(ocean_grid_type),                    intent(in)  :: G  !< Ocean grid structure
+subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos, better_speed_est, &
+                       min_speed, wave_speed_tol)
+  type(ocean_grid_type),                    intent(in)  :: G !< Ocean grid structure
   type(verticalGrid_type),                  intent(in)  :: GV !< Vertical grid structure
   type(unit_scale_type),                    intent(in)  :: US !< A dimensional unit scaling type
-  real, dimension(SZI_(G),SZJ_(G),SZK_(G)), intent(in)  :: h  !< Layer thickness [H ~> m or kg m-2]
+  real, dimension(SZI_(G),SZJ_(G),SZK_(G)), intent(in)  :: h !< Layer thickness [H ~> m or kg m-2]
   type(thermo_var_ptrs),                    intent(in)  :: tv !< Thermodynamic variables
   integer,                                  intent(in)  :: nmodes !< Number of modes
   real, dimension(G%isd:G%ied,G%jsd:G%jed,nmodes), intent(out) :: cn !< Waves speeds [L T-1 ~> m s-1]
   type(wave_speed_CS), optional,            pointer     :: CS !< Control structure for MOM_wave_speed
   logical,             optional,            intent(in)  :: full_halos !< If true, do the calculation
                                                                       !! over the entire computational domain.
+  logical, optional, intent(in) :: better_speed_est !< If true, use a more robust estimate of the first
+                                     !! mode speed as the starting point for iterations.
+  real,    optional, intent(in) :: min_speed !< If present, set a floor in the first mode speed
+                                     !! below which 0 is returned [L T-1 ~> m s-1].
+  real,    optional, intent(in) :: wave_speed_tol !< The fractional tolerance for finding the
+                                     !! wave speeds [nondim]
   ! Local variables
   real, dimension(SZK_(G)+1) :: &
     dRho_dT, &    ! Partial derivative of density with temperature [R degC-1 ~> kg m-3 degC-1]
@@ -585,6 +693,8 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
     pres, &       ! Interface pressure [R L2 T-2 ~> Pa]
     T_int, &      ! Temperature interpolated to interfaces [degC]
     S_int, &      ! Salinity interpolated to interfaces [ppt]
+    H_top, &      ! The distance of each filtered interface from the ocean surface [Z ~> m]
+    H_bot, &      ! The distance of each filtered interface from the bottom [Z ~> m]
     gprime        ! The reduced gravity across each interface [L2 Z-1 T-2 ~> m s-2].
   real, dimension(SZK_(G)) :: &
     Igl, Igu      ! The inverse of the reduced gravity across an interface times
@@ -603,8 +713,12 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
     Tc, &         ! A column of layer temperatures after convective istabilities are removed [degC]
     Sc, &         ! A column of layer salinites after convective istabilities are removed [ppt]
     Rc            ! A column of layer densities after convective istabilities are removed [R ~> kg m-3]
+  real :: I_Htot  ! The inverse of the total filtered thicknesses [Z ~> m]
   real :: c1_thresh  ! if c1 is below this value, don't bother calculating
                      ! cn values for higher modes [L T-1 ~> m s-1]
+  real :: c2_scale ! A scaling factor for wave speeds to help control the growth of the determinant
+                   ! and its derivative with lam between rows of the Thomas algorithm solver.  The
+                   ! exact value should not matter for the final result if it is an even power of 2.
   real :: det, ddet       ! determinant & its derivative of eigen system
   real :: lam_1           ! approximate mode-1 eigenvalue [T2 L-2 ~> s2 m-2]
   real :: lam_n           ! approximate mode-n eigenvalue [T2 L-2 ~> s2 m-2]
@@ -631,16 +745,23 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
     HxR_here       ! A layer integrated density [R Z ~> kg m-2]
   real :: speed2_tot ! overestimate of the mode-1 speed squared [L2 T-2 ~> m2 s-2]
   real :: speed2_min ! minimum mode speed (squared) to consider in root searching [L2 T-2 ~> m2 s-2]
+  real :: cg1_min2 ! A floor in the squared first mode speed below which 0 is returned [L2 T-2 ~> m2 s-2]
   real, parameter :: reduct_factor = 0.5
                      ! factor used in setting speed2_min [nondim]
   real :: I_Hnew   ! The inverse of a new layer thickness [Z-1 ~> m-1]
-  real :: drxh_sum ! The sum of density diffrences across interfaces times thicknesses [R Z ~> kg m-2]
-  real, parameter :: tol1  = 0.0001, tol2 = 0.001
+  real :: drxh_sum ! The sum of density differences across interfaces times thicknesses [R Z ~> kg m-2]
   real, pointer, dimension(:,:,:) :: T => NULL(), S => NULL()
-  real :: g_Rho0  ! G_Earth/Rho0 [L2 T-2 Z-1 R-1 ~> m4 s-2 kg-1].
-  integer :: kf(SZI_(G))
+  real :: g_Rho0   ! G_Earth/Rho0 [L2 T-2 Z-1 R-1 ~> m4 s-2 kg-1].
+  real :: tol_Hfrac  ! Layers that together are smaller than this fraction of
+                     ! the total water column can be merged for efficiency.
+  real :: tol_solve  ! The fractional tolerance with which to solve for the wave speeds [nondim].
+  real :: tol_merge  ! The fractional change in estimated wave speed that is allowed
+                     ! when deciding to merge layers in the calculation [nondim]
+  integer :: kf(SZI_(G)) ! The number of active layers after filtering.
   integer, parameter :: max_itt = 10
   logical :: use_EOS    ! If true, density is calculated from T & S using the equation of state.
+  logical :: better_est ! If true, use an improved estimate of the first mode internal wave speed.
+  logical :: merge      ! If true, merge the current layer with the one above.
   real, dimension(SZK_(G)+1) :: z_int
   ! real, dimension(SZK_(G)+1) :: N2  ! The local squared buoyancy frequency [T-2 ~> s-2]
   integer :: nsub       ! number of subintervals used for root finding
@@ -648,8 +769,8 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
                         ! maximum number of times to subdivide interval
                         ! for root finding (# intervals = 2**sub_it_max)
   logical :: sub_rootfound ! if true, subdivision has located root
-  integer :: kc, nrows
-  integer :: sub, sub_it
+  integer :: kc         ! The number of layers in the column after merging
+  integer :: nrows, sub, sub_it
   integer :: i, j, k, k2, itt, is, ie, js, je, nz, row, iint, m, ig, jg
 
   is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = G%ke
@@ -669,8 +790,21 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
   ! Simplifying the following could change answers at roundoff.
   Z_to_pres = GV%Z_to_H * (GV%H_to_RZ * GV%g_Earth)
   c1_thresh = 0.01*US%m_s_to_L_T
+  c2_scale = US%m_s_to_L_T**2 / 4096.0**2 ! Other powers of 2 give identical results.
+
+  better_est = .false. ; if (present(CS)) better_est = CS%better_cg1_est
+  if (present(better_speed_est)) better_est = better_speed_est
+  if (better_est) then
+    tol_solve = 0.001 ; if (present(CS)) tol_solve = CS%wave_speed_tol
+    if (present(wave_speed_tol)) tol_solve = wave_speed_tol
+    tol_Hfrac  = 0.1*tol_solve ; tol_merge = tol_solve / real(nz)
+  else
+    tol_Hfrac  = 0.0001 ; tol_solve = 0.001 ; tol_merge = 0.001
+  endif
+  cg1_min2 = 0.0 ; if (present(CS)) cg1_min2 = CS%min_speed2
+  if (present(min_speed)) cg1_min2 = min_speed**2
 
-  min_h_frac = tol1 / real(nz)
+  min_h_frac = tol_Hfrac / real(nz)
   !$OMP parallel do default(private) shared(is,ie,js,je,nz,h,G,GV,US,min_h_frac,use_EOS,T,S, &
   !$OMP                                     Z_to_pres,tv,cn,g_Rho0,nmodes)
   do j=js,je
@@ -726,48 +860,85 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
       endif ; enddo
     endif
 
-    ! From this point, we can work on individual columns without causing memory
-    ! to have page faults.
+    ! From this point, we can work on individual columns without causing memory to have page faults.
     do i=is,ie
       if (G%mask2dT(i,j) > 0.5) then
         if (use_EOS) then
-          pres(1) = 0.0
-          do k=2,kf(i)
-            pres(k) = pres(k-1) + Z_to_pres*Hf(k-1,i)
-            T_int(k) = 0.5*(Tf(k,i)+Tf(k-1,i))
-            S_int(k) = 0.5*(Sf(k,i)+Sf(k-1,i))
+          pres(1) = 0.0 ; H_top(1) = 0.0
+          do K=2,kf(i)
+            pres(K) = pres(K-1) + Z_to_pres*Hf(k-1,i)
+            T_int(K) = 0.5*(Tf(k,i)+Tf(k-1,i))
+            S_int(K) = 0.5*(Sf(k,i)+Sf(k-1,i))
+            H_top(K) = H_top(K-1) + Hf(k-1,i)
           enddo
           call calculate_density_derivs(T_int, S_int, pres, drho_dT, drho_dS, &
                                         tv%eqn_of_state, (/2,kf(i)/) )
 
-          ! Sum the reduced gravities to find out how small a density difference
-          ! is negligibly small.
+          ! Sum the reduced gravities to find out how small a density difference is negligibly small.
           drxh_sum = 0.0
-          do k=2,kf(i)
-            drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
-                max(0.0,drho_dT(k)*(Tf(k,i)-Tf(k-1,i)) + &
-                        drho_dS(k)*(Sf(k,i)-Sf(k-1,i)))
-          enddo
+          if (better_est) then
+            ! This is an estimate that is correct for the non-EBT mode for 2 or 3 layers, or for
+            ! clusters of massless layers at interfaces that can be grouped into 2 or 3 layers.
+            ! For a uniform stratification and a huge number of layers uniformly distributed in
+            ! density, this estimate is too large (as is desired) by a factor of pi^2/6 ~= 1.64.
+            if (H_top(kf(i)) > 0.0) then
+              I_Htot = 1.0 / (H_top(kf(i)) + Hf(kf(i),i))  ! = 1.0 / (H_top(K) + H_bot(K)) for all K.
+              H_bot(kf(i)+1) = 0.0
+              do K=kf(i),2,-1
+                H_bot(K) = H_bot(K+1) + Hf(k,i)
+                drxh_sum = drxh_sum + ((H_top(K) * H_bot(K)) * I_Htot) * &
+                    max(0.0, drho_dT(K)*(Tf(k,i)-Tf(k-1,i)) + drho_dS(K)*(Sf(k,i)-Sf(k-1,i)))
+              enddo
+            endif
+          else
+            ! This estimate is problematic in that it goes like 1/nz for a large number of layers,
+            ! but it is an overestimate (as desired) for a small number of layers, by at a factor
+            ! of (H1+H2)**2/(H1*H2) >= 4 for two thick layers.
+            do K=2,kf(i)
+              drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
+                  max(0.0, drho_dT(K)*(Tf(k,i)-Tf(k-1,i)) + drho_dS(K)*(Sf(k,i)-Sf(k-1,i)))
+            enddo
+          endif
         else
           drxh_sum = 0.0
-          do k=2,kf(i)
-            drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * &
-                              max(0.0,Rf(k,i)-Rf(k-1,i))
-          enddo
+          if (better_est) then
+            H_top(1) = 0.0
+            do K=2,kf(i) ; H_top(K) = H_top(K-1) + Hf(k-1,i) ; enddo
+              if (H_top(kf(i)) > 0.0) then
+              I_Htot = 1.0 / (H_top(kf(i)) + Hf(kf(i),i))  ! = 1.0 / (H_top(K) + H_bot(K)) for all K.
+              H_bot(kf(i)+1) = 0.0
+              do K=kf(i),2,-1
+                H_bot(K) = H_bot(K+1) + Hf(k,i)
+                drxh_sum = drxh_sum + ((H_top(K) * H_bot(K)) * I_Htot) * max(0.0,Rf(k,i)-Rf(k-1,i))
+              enddo
+            endif
+          else
+            do K=2,kf(i)
+              drxh_sum = drxh_sum + 0.5*(Hf(k-1,i)+Hf(k,i)) * max(0.0,Rf(k,i)-Rf(k-1,i))
+            enddo
+          endif
         endif
-    !   Find gprime across each internal interface, taking care of convective
-    ! instabilities by merging layers.
-        if (drxh_sum <= 0.0) then
+
+        !   Find gprime across each internal interface, taking care of convective
+        ! instabilities by merging layers.
+        if (g_Rho0 * drxh_sum <= cg1_min2) then
           cn(i,j,:) = 0.0
         else
           ! Merge layers to eliminate convective instabilities or exceedingly
-          ! small reduced gravities.
+          ! small reduced gravities.  Merging layers reduces the estimated wave speed by
+          ! (rho(2)-rho(1))*h(1)*h(2) / H_tot.
           if (use_EOS) then
             kc = 1
             Hc(1) = Hf(1,i) ; Tc(1) = Tf(1,i) ; Sc(1) = Sf(1,i)
             do k=2,kf(i)
-              if ((drho_dT(k)*(Tf(k,i)-Tc(kc)) + drho_dS(k)*(Sf(k,i)-Sc(kc))) * &
-                  (Hc(kc) + Hf(k,i)) < 2.0 * tol2*drxh_sum) then
+              if (better_est) then
+                merge = ((drho_dT(K)*(Tf(k,i)-Tc(kc)) + drho_dS(K)*(Sf(k,i)-Sc(kc))) * &
+                         ((Hc(kc) * Hf(k,i))*I_Htot) < 2.0 * tol_merge*drxh_sum)
+              else
+                merge = ((drho_dT(K)*(Tf(k,i)-Tc(kc)) + drho_dS(K)*(Sf(k,i)-Sc(kc))) * &
+                         (Hc(kc) + Hf(k,i)) < 2.0 * tol_merge*drxh_sum)
+              endif
+              if (merge) then
                 ! Merge this layer with the one above and backtrack.
                 I_Hnew = 1.0 / (Hc(kc) + Hf(k,i))
                 Tc(kc) = (Hc(kc)*Tc(kc) + Hf(k,i)*Tf(k,i)) * I_Hnew
@@ -776,9 +947,15 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
                 ! Backtrack to remove any convective instabilities above...  Note
                 ! that the tolerance is a factor of two larger, to avoid limit how
                 ! far back we go.
-                do k2=kc,2,-1
-                  if ((drho_dT(k2)*(Tc(k2)-Tc(k2-1)) + drho_dS(k2)*(Sc(k2)-Sc(k2-1))) * &
-                      (Hc(k2) + Hc(k2-1)) < tol2*drxh_sum) then
+                do K2=kc,2,-1
+                  if (better_est) then
+                    merge = ((drho_dT(K2)*(Tc(k2)-Tc(k2-1)) + drho_dS(K2)*(Sc(k2)-Sc(k2-1))) * &
+                             ((Hc(k2) * Hc(k2-1))*I_Htot) < tol_merge*drxh_sum)
+                  else
+                    merge = ((drho_dT(K2)*(Tc(k2)-Tc(k2-1)) + drho_dS(K2)*(Sc(k2)-Sc(k2-1))) * &
+                             (Hc(k2) + Hc(k2-1)) < tol_merge*drxh_sum)
+                  endif
+                  if (merge) then
                     ! Merge the two bottommost layers.  At this point kc = k2.
                     I_Hnew = 1.0 / (Hc(kc) + Hc(kc-1))
                     Tc(kc-1) = (Hc(kc)*Tc(kc) + Hc(kc-1)*Tc(kc-1)) * I_Hnew
@@ -790,21 +967,25 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
               else
                 ! Add a new layer to the column.
                 kc = kc + 1
-                drho_dS(kc) = drho_dS(k) ; drho_dT(kc) = drho_dT(k)
+                drho_dS(Kc) = drho_dS(K) ; drho_dT(Kc) = drho_dT(K)
                 Tc(kc) = Tf(k,i) ; Sc(kc) = Sf(k,i) ; Hc(kc) = Hf(k,i)
               endif
             enddo
             ! At this point there are kc layers and the gprimes should be positive.
-            do k=2,kc ! Revisit this if non-Boussinesq.
-              gprime(k) = g_Rho0 * (drho_dT(k)*(Tc(k)-Tc(k-1)) + &
-                                    drho_dS(k)*(Sc(k)-Sc(k-1)))
+            do K=2,kc ! Revisit this if non-Boussinesq.
+              gprime(K) = g_Rho0 * (drho_dT(K)*(Tc(k)-Tc(k-1)) + drho_dS(K)*(Sc(k)-Sc(k-1)))
             enddo
           else  ! .not.use_EOS
             ! Do the same with density directly...
             kc = 1
             Hc(1) = Hf(1,i) ; Rc(1) = Rf(1,i)
             do k=2,kf(i)
-              if ((Rf(k,i) - Rc(kc)) * (Hc(kc) + Hf(k,i)) < 2.0*tol2*drxh_sum) then
+              if (better_est) then
+                merge = ((Rf(k,i) - Rc(kc)) * ((Hc(kc) * Hf(k,i))*I_Htot) < 2.0 * tol_merge*drxh_sum)
+              else
+                merge = ((Rf(k,i) - Rc(kc)) * (Hc(kc) + Hf(k,i)) < 2.0*tol_merge*drxh_sum)
+              endif
+              if (merge) then
                 ! Merge this layer with the one above and backtrack.
                 Rc(kc) = (Hc(kc)*Rc(kc) + Hf(k,i)*Rf(k,i)) / (Hc(kc) + Hf(k,i))
                 Hc(kc) = (Hc(kc) + Hf(k,i))
@@ -812,7 +993,12 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
                 ! that the tolerance is a factor of two larger, to avoid limit how
                 ! far back we go.
                 do k2=kc,2,-1
-                  if ((Rc(k2)-Rc(k2-1)) * (Hc(k2)+Hc(k2-1)) < tol2*drxh_sum) then
+                  if (better_est) then
+                    merge = ((Rc(k2)-Rc(k2-1)) * ((Hc(kc) * Hc(k2-1))*I_Htot) < tol_merge*drxh_sum)
+                  else
+                    merge = ((Rc(k2)-Rc(k2-1)) * (Hc(k2)+Hc(k2-1)) < tol_merge*drxh_sum)
+                  endif
+                  if (merge) then
                     ! Merge the two bottommost layers.  At this point kc = k2.
                     Rc(kc-1) = (Hc(kc)*Rc(kc) + Hc(kc-1)*Rc(kc-1)) / (Hc(kc) + Hc(kc-1))
                     Hc(kc-1) = (Hc(kc) + Hc(kc-1))
@@ -826,8 +1012,8 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
               endif
             enddo
             ! At this point there are kc layers and the gprimes should be positive.
-            do k=2,kc ! Revisit this if non-Boussinesq.
-              gprime(k) = g_Rho0 * (Rc(k)-Rc(k-1))
+            do K=2,kc ! Revisit this if non-Boussinesq.
+              gprime(K) = g_Rho0 * (Rc(k)-Rc(k-1))
             enddo
           endif  ! use_EOS
 
@@ -840,13 +1026,24 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
             z_int(1) = 0.0
             ! initialize speed2_tot
             speed2_tot = 0.0
+            if (better_est) then
+              H_top(1) = 0.0 ; H_bot(kc+1) = 0.0
+              do K=2,kc+1 ; H_top(K) = H_top(K-1) + Hc(k-1) ; enddo
+              do K=kc,2,-1 ; H_bot(K) = H_bot(K+1) + Hc(k) ; enddo
+              I_Htot = 0.0 ; if (H_top(kc+1) > 0.0) I_Htot = 1.0 / H_top(kc+1)
+            endif
+
             ! Calculate Igu, Igl, depth, and N2 at each interior interface
             ! [excludes surface (K=1) and bottom (K=kc+1)]
             do K=2,kc
               Igl(K) = 1.0/(gprime(K)*Hc(k)) ; Igu(K) = 1.0/(gprime(K)*Hc(k-1))
               z_int(K) = z_int(K-1) + Hc(k-1)
               ! N2(K) = US%L_to_Z**2*gprime(K)/(0.5*(Hc(k)+Hc(k-1)))
-              speed2_tot = speed2_tot + gprime(K)*(Hc(k-1)+Hc(k))
+              if (better_est) then
+                speed2_tot = speed2_tot + gprime(K)*((H_top(K) * H_bot(K)) * I_Htot)
+              else
+                speed2_tot = speed2_tot + gprime(K)*(Hc(k-1)+Hc(k))
+              endif
             enddo
             ! Set stratification for surface and bottom (setting equal to nearest interface for now)
             ! N2(1) = N2(2) ; N2(kc+1) = N2(kc)
@@ -878,14 +1075,14 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
             ! Total number of rows in the matrix = number of interior interfaces
             nrows = kc-1
 
-            ! Under estimate the first eigenvalue to start with.
+            ! Under estimate the first eigenvalue (overestimate the speed) to start with.
             lam_1 = 1.0 / speed2_tot
 
-            ! Find the first eigenvalue
+            ! Find the first eigen value
             do itt=1,max_itt
               ! calculate the determinant of (A-lam_1*I)
               call tridiag_det(a_diag(1:nrows),b_diag(1:nrows),c_diag(1:nrows), &
-                                      nrows,lam_1,det,ddet, row_scale=US%m_s_to_L_T**2)
+                                      nrows,lam_1,det,ddet, row_scale=c2_scale)
               ! Use Newton's method iteration to find a new estimate of lam_1
               !det = det_it(itt) ; ddet = ddet_it(itt)
               if ((ddet >= 0.0) .or. (-det > -0.5*lam_1*ddet)) then
@@ -896,7 +1093,7 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
               else  ! Newton's method is OK.
                 dlam = - det / ddet
                 lam_1 = lam_1 + dlam
-                if (abs(dlam) < tol2*lam_1) then
+                if (abs(dlam) < tol_solve*lam_1) then
                   ! calculate 1st mode speed
                   if (lam_1 > 0.0) cn(i,j,1) = 1.0 / sqrt(lam_1)
                   exit
@@ -904,12 +1101,12 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
               endif
             enddo
 
-            ! Find other eigenvalues if c1 is of significant magnitude, > cn_thresh
+            ! Find other eigen values if c1 is of significant magnitude, > cn_thresh
             nrootsfound = 0    ! number of extra roots found (not including 1st root)
             if (nmodes>1 .and. kc>=nmodes+1 .and. cn(i,j,1)>c1_thresh) then
-              ! Set the the range to look for the other desired eigenvalues
+              ! Set the the range to look for the other desired eigen values
               ! set min value just greater than the 1st root (found above)
-              lamMin = lam_1*(1.0 + tol2)
+              lamMin = lam_1*(1.0 + tol_solve)
               ! set max value based on a low guess at wavespeed for highest mode
               speed2_min = (reduct_factor*cn(i,j,1)/real(nmodes))**2
               lamMax = 1.0 / speed2_min
@@ -923,13 +1120,13 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
 
               ! find det_l of first interval (det at left endpoint)
               call tridiag_det(a_diag(1:nrows),b_diag(1:nrows),c_diag(1:nrows), &
-                               nrows,lamMin,det_l,ddet_l, row_scale=US%m_s_to_L_T**2)
+                               nrows,lamMin,det_l,ddet_l, row_scale=c2_scale)
               ! move interval window looking for zero-crossings************************
               do iint=1,numint
                 xr = lamMin + lamInc * iint
                 xl = xr - lamInc
                 call tridiag_det(a_diag(1:nrows),b_diag(1:nrows),c_diag(1:nrows), &
-                                 nrows,xr,det_r,ddet_r, row_scale=US%m_s_to_L_T**2)
+                                 nrows,xr,det_r,ddet_r, row_scale=c2_scale)
                 if (det_l*det_r < 0.0) then  ! if function changes sign
                   if (det_l*ddet_l < 0.0) then ! if function at left is headed to zero
                     nrootsfound = nrootsfound + 1
@@ -950,7 +1147,7 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
                       do sub=1,nsub-1,2 ! only check odds; sub = 1; 1,3; 1,3,5,7;...
                         xl_sub = xl + lamInc/(nsub)*sub
                         call tridiag_det(a_diag(1:nrows),b_diag(1:nrows),c_diag(1:nrows), &
-                                 nrows,xl_sub,det_sub,ddet_sub, row_scale=US%m_s_to_L_T**2)
+                                 nrows,xl_sub,det_sub,ddet_sub, row_scale=c2_scale)
                         if (det_sub*det_r < 0.0) then  ! if function changes sign
                           if (det_sub*ddet_sub < 0.0) then ! if function at left is headed to zero
                             sub_rootfound = .true.
@@ -993,11 +1190,11 @@ subroutine wave_speeds(h, tv, G, GV, US, nmodes, cn, CS, full_halos)
                 do itt=1,max_itt
                   ! calculate the determinant of (A-lam_n*I)
                   call tridiag_det(a_diag(1:nrows),b_diag(1:nrows),c_diag(1:nrows), &
-                                   nrows,lam_n,det,ddet, row_scale=US%m_s_to_L_T**2)
+                                   nrows,lam_n,det,ddet, row_scale=c2_scale)
                   ! Use Newton's method to find a new estimate of lam_n
                   dlam = - det / ddet
                   lam_n = lam_n + dlam
-                  if (abs(dlam) < tol2*lam_1) then
+                  if (abs(dlam) < tol_solve*lam_1) then
                     ! calculate nth mode speed
                     if (lam_n > 0.0) cn(i,j,m+1) = 1.0 / sqrt(lam_n)
                     exit
@@ -1036,7 +1233,7 @@ subroutine tridiag_det(a, b, c, nrows, lam, det_out, ddet_out, row_scale)
   ! Local variables
   real, dimension(nrows) :: det ! value of recursion function
   real, dimension(nrows) :: ddet ! value of recursion function for derivative
-  real, parameter:: rescale = 1024.0**4 ! max value of determinant allowed before rescaling
+  real, parameter :: rescale = 1024.0**4 ! max value of determinant allowed before rescaling
   real :: rscl
   real :: I_rescale ! inverse of rescale
   integer :: n      ! row (layer interface) index
@@ -1068,7 +1265,8 @@ subroutine tridiag_det(a, b, c, nrows, lam, det_out, ddet_out, row_scale)
 end subroutine tridiag_det
 
 !> Initialize control structure for MOM_wave_speed
-subroutine wave_speed_init(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_depth, remap_answers_2018)
+subroutine wave_speed_init(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_depth, remap_answers_2018, &
+                           better_speed_est, min_speed, wave_speed_tol)
   type(wave_speed_CS), pointer :: CS !< Control structure for MOM_wave_speed
   logical, optional, intent(in) :: use_ebt_mode  !< If true, use the equivalent
                                      !! barotropic mode instead of the first baroclinic mode.
@@ -1079,9 +1277,14 @@ subroutine wave_speed_init(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_de
                                      !! as monotonic for the purposes of calculating the
                                      !! vertical modal structure [Z ~> m].
   logical, optional, intent(in) :: remap_answers_2018 !< If true, use the order of arithmetic and expressions
-                                      !! that recover the remapping answers from 2018.  Otherwise
-                                      !! use more robust but mathematically equivalent expressions.
-
+                                     !! that recover the remapping answers from 2018.  Otherwise
+                                     !! use more robust but mathematically equivalent expressions.
+  logical, optional, intent(in) :: better_speed_est !< If true, use a more robust estimate of the first
+                                     !! mode speed as the starting point for iterations.
+  real,    optional, intent(in) :: min_speed !< If present, set a floor in the first mode speed
+                                     !! below which 0 is returned [L T-1 ~> m s-1].
+  real,    optional, intent(in) :: wave_speed_tol !< The fractional tolerance for finding the
+                                     !! wave speeds [nondim]
 
   ! This include declares and sets the variable "version".
 # include "version_variable.h"
@@ -1096,7 +1299,8 @@ subroutine wave_speed_init(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_de
   ! Write all relevant parameters to the model log.
   call log_version(mdl, version)
 
-  call wave_speed_set_param(CS, use_ebt_mode=use_ebt_mode, mono_N2_column_fraction=mono_N2_column_fraction)
+  call wave_speed_set_param(CS, use_ebt_mode=use_ebt_mode, mono_N2_column_fraction=mono_N2_column_fraction, &
+                            better_speed_est=better_speed_est, min_speed=min_speed, wave_speed_tol=wave_speed_tol)
 
   call initialize_remapping(CS%remapping_CS, 'PLM', boundary_extrapolation=.false., &
                             answers_2018=CS%remap_answers_2018)
@@ -1104,7 +1308,8 @@ subroutine wave_speed_init(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_de
 end subroutine wave_speed_init
 
 !> Sets internal parameters for MOM_wave_speed
-subroutine wave_speed_set_param(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_depth, remap_answers_2018)
+subroutine wave_speed_set_param(CS, use_ebt_mode, mono_N2_column_fraction, mono_N2_depth, remap_answers_2018, &
+                                better_speed_est, min_speed, wave_speed_tol)
   type(wave_speed_CS), pointer  :: CS !< Control structure for MOM_wave_speed
   logical, optional, intent(in) :: use_ebt_mode  !< If true, use the equivalent
                                       !! barotropic mode instead of the first baroclinic mode.
@@ -1117,6 +1322,12 @@ subroutine wave_speed_set_param(CS, use_ebt_mode, mono_N2_column_fraction, mono_
   logical, optional, intent(in) :: remap_answers_2018 !< If true, use the order of arithmetic and expressions
                                       !! that recover the remapping answers from 2018.  Otherwise
                                       !! use more robust but mathematically equivalent expressions.
+  logical, optional, intent(in) :: better_speed_est !< If true, use a more robust estimate of the first
+                                     !! mode speed as the starting point for iterations.
+  real,    optional, intent(in) :: min_speed !< If present, set a floor in the first mode speed
+                                     !! below which 0 is returned [L T-1 ~> m s-1].
+  real,    optional, intent(in) :: wave_speed_tol !< The fractional tolerance for finding the
+                                     !! wave speeds [nondim]
 
   if (.not.associated(CS)) call MOM_error(FATAL, &
      "wave_speed_set_param called with an associated control structure.")
@@ -1125,6 +1336,9 @@ subroutine wave_speed_set_param(CS, use_ebt_mode, mono_N2_column_fraction, mono_
   if (present(mono_N2_column_fraction)) CS%mono_N2_column_fraction = mono_N2_column_fraction
   if (present(mono_N2_depth)) CS%mono_N2_depth = mono_N2_depth
   if (present(remap_answers_2018)) CS%remap_answers_2018 = remap_answers_2018
+  if (present(better_speed_est)) CS%better_cg1_est = better_speed_est
+  if (present(min_speed)) CS%min_speed2 = min_speed**2
+  if (present(wave_speed_tol)) CS%wave_speed_tol = wave_speed_tol
 
 end subroutine wave_speed_set_param
 
@@ -1151,7 +1365,7 @@ end subroutine wave_speed_set_param
 !!
 !! Here
 !! \verbatim
-!!   Igl(k) = 1.0/(gprime(k)*h(k)) ; Igu(k) = 1.0/(gprime(k)*h(k-1))
+!!   Igl(k) = 1.0/(gprime(K)*h(k)) ; Igu(k) = 1.0/(gprime(K)*h(k-1))
 !! \endverbatim
 !!
 !! Alternately, these same eigenvalues can be found from the second smallest
diff --git a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90
index e5e699ebee..37e549f3f1 100644
--- a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90
+++ b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90
@@ -602,8 +602,7 @@ subroutine calc_Visbeck_coeffs(h, slope_x, slope_y, N2_u, N2_v, G, GV, US, CS)
   endif
 
   if (CS%debug) then
-    call uvchksum("calc_Visbeck_coeffs slope_[xy]", slope_x, slope_y, G%HI, &
-                  haloshift=1)
+    call uvchksum("calc_Visbeck_coeffs slope_[xy]", slope_x, slope_y, G%HI, haloshift=1)
     call uvchksum("calc_Visbeck_coeffs N2_u, N2_v", N2_u, N2_v, G%HI, &
                   scale=US%s_to_T**2, scalar_pair=.true.)
     call uvchksum("calc_Visbeck_coeffs SN_[uv]", CS%SN_u, CS%SN_v, G%HI, &
@@ -926,8 +925,12 @@ subroutine VarMix_init(Time, G, GV, US, param_file, diag, CS)
   real :: Leith_Lap_const      ! The non-dimensional coefficient in the Leith viscosity
   real :: grid_sp_u2, grid_sp_v2 ! Intermediate quantities for Leith metrics [L2 ~> m2]
   real :: grid_sp_u3, grid_sp_v3 ! Intermediate quantities for Leith metrics [L3 ~> m3]
+  real :: wave_speed_min      ! A floor in the first mode speed below which 0 is returned [L T-1 ~> m s-1]
+  real :: wave_speed_tol      ! The fractional tolerance for finding the wave speeds [nondim]
+  logical :: better_speed_est ! If true, use a more robust estimate of the first
+                              ! mode wave speed as the starting point for iterations.
 ! This include declares and sets the variable "version".
-#include "version_variable.h"
+# include "version_variable.h"
   character(len=40)  :: mdl = "MOM_lateral_mixing_coeffs" ! This module's name.
   integer :: is, ie, js, je, Isq, Ieq, Jsq, Jeq, i, j
   integer :: isd, ied, jsd, jed, IsdB, IedB, JsdB, JedB
@@ -1250,8 +1253,20 @@ subroutine VarMix_init(Time, G, GV, US, param_file, diag, CS)
                  "If true, use the order of arithmetic and expressions that recover the "//&
                  "answers from the end of 2018.  Otherwise, use updated and more robust "//&
                  "forms of the same expressions.", default=default_2018_answers)
+    call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_TOL", wave_speed_tol, &
+                 "The fractional tolerance for finding the wave speeds.", &
+                 units="nondim", default=0.001)
+    !### Set defaults so that wave_speed_min*wave_speed_tol >= 1e-9 m s-1
+    call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_MIN", wave_speed_min, &
+                 "A floor in the first mode speed below which 0 used instead.", &
+                 units="m s-1", default=0.0, scale=US%m_s_to_L_T)
+    call get_param(param_file, mdl, "INTERNAL_WAVE_SPEED_BETTER_EST", better_speed_est, &
+                 "If true, use a more robust estimate of the first mode wave speed as the "//&
+                 "starting point for iterations.", default=.false.) !### Change the default.
     call wave_speed_init(CS%wave_speed_CSp, use_ebt_mode=CS%Resoln_use_ebt, &
-                         mono_N2_depth=N2_filter_depth, remap_answers_2018=remap_answers_2018)
+                         mono_N2_depth=N2_filter_depth, remap_answers_2018=remap_answers_2018, &
+                         better_speed_est=better_speed_est, min_speed=wave_speed_min, &
+                         wave_speed_tol=wave_speed_tol)
   endif
 
   ! Leith parameters