diff --git a/src/ice_shelf/MOM_ice_shelf_dynamics.F90 b/src/ice_shelf/MOM_ice_shelf_dynamics.F90
index 56f01be873..ab0b1db776 100644
--- a/src/ice_shelf/MOM_ice_shelf_dynamics.F90
+++ b/src/ice_shelf/MOM_ice_shelf_dynamics.F90
@@ -41,6 +41,11 @@ module MOM_ice_shelf_dynamics
 public shelf_advance_front, ice_shelf_min_thickness_calve, calve_to_mask, volume_above_floatation
 public masked_var_grounded
 
+! SSA inner solver flags
+integer, parameter :: INNER_CG = 1       !< Conjugate gradient (default)
+integer, parameter :: INNER_MINRES = 2   !< MINRES
+integer, parameter :: INNER_CR = 3       !< Conjugate residual
+
 ! A note on unit descriptions in comments: MOM6 uses units that can be rescaled for dimensional
 ! consistency testing. These are noted in comments with units like Z, H, L, and T, along with
 ! their mks counterparts with notation like "a velocity [Z T-1 ~> m s-1]".  If the units
@@ -255,6 +260,9 @@ module MOM_ice_shelf_dynamics
                                   !! and use reproducible sums
   logical :: doing_newton = .false. !< If true, the outer iteration is using Newton (tangent) linearization
                                     !! instead of Picard (secant) linearization for the ice viscosity
+  integer :: inner_solver !< The inner linear solver: INNER_CG (1),INNER_MINRES (2), or INNER_CR (3)
+  logical :: cg_halo_shrink = .true. !< If true, CG uses halo-shrinking to defer pass_vector calls;
+                                     !! if false, uses fixed CG_action range with 1 pass_vector per iteration
   logical :: module_is_initialized = .false. !< True if this module has been initialized.
 
   !>@{ Diagnostic handles
@@ -483,6 +491,7 @@ subroutine initialize_ice_shelf_dyn(param_file, Time, ISS, CS, G, US, diag, new_
   integer :: i, j, isd, ied, jsd, jed, Isdq, Iedq, Jsdq, Jedq, iters
   character(len=200) :: IS_energyfile  ! The name of the energy file.
   character(len=32) :: filename_appendix = '' ! FMS appendix to filename for ensemble runs
+  character(len=16) :: inner_solver_str ! The type of inner solver to use for the SSA
 
   Isdq = G%isdB ; Iedq = G%iedB ; Jsdq = G%jsdB ; Jedq = G%jedB
   isd = G%isd ; ied = G%ied ; jsd = G%jsd ; jed = G%jed
@@ -599,6 +608,23 @@ subroutine initialize_ice_shelf_dyn(param_file, Time, ISS, CS, G, US, diag, new_
                 units="none", default=CS%nonlinear_tolerance)
     call get_param(param_file, mdl, "NEWTON_ADAPT_CG_TOL", CS%newton_adapt_cg_tol, &
                 "Use an adaptive CG tolerance during Newton iterations.", default=.true.)
+    call get_param(param_file, mdl, "ICE_SHELF_INNER_SOLVER", inner_solver_str, &
+                "Choice of inner linear solver for the ice-shelf SSA velocity system. "//&
+                "Valid choices are CG (default), CR, and MINRES.", &
+                default="CG")
+    select case (trim(inner_solver_str))
+      case ("CG")
+        CS%inner_solver = INNER_CG
+      case ("MINRES")
+        CS%inner_solver = INNER_MINRES
+      case ("CR")
+        CS%inner_solver = INNER_CR
+    end select
+    call get_param(param_file, mdl, "CG_HALO_SHRINK", CS%cg_halo_shrink, &
+                "If true, CG uses halo-shrinking to defer pass_vector calls. "//&
+                "If false, uses a fixed CG_action range with one pass_vector(D) per iteration, "//&
+                "which may reduce total communication for typical halo widths.", &
+                default=.true.)
     call get_param(param_file, mdl, "CONJUGATE_GRADIENT_MAXIT", CS%cg_max_iterations, &
                 "max iteratiions in CG solver", default=2000)
     call get_param(param_file, mdl, "THRESH_FLOAT_COL_DEPTH", CS%thresh_float_col_depth, &
@@ -715,7 +741,7 @@ subroutine initialize_ice_shelf_dyn(param_file, Time, ISS, CS, G, US, diag, new_
     allocate(CS%Jac(1:4,isd:ied,jsd:jed), source=0.0)
     do j=G%jsd,G%jed ; do i=G%isd,G%ied
       call bilinear_shape_fn_grid(G, i, j, CS%Phi(:,:,i,j), CS%Jac(:,i,j))
-    enddo; enddo
+    enddo ; enddo
 
     if (CS%GL_regularize) then
       allocate(CS%Phisub(2,2,CS%n_sub_regularize,CS%n_sub_regularize,2,2), source=0.0)
@@ -1636,7 +1662,6 @@ subroutine ice_shelf_solve_outer(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, i
 
   u_last(:,:) = u_shlf(:,:) ; v_last(:,:) = v_shlf(:,:)
   CS%cg_tol_newton = CS%cg_tolerance
-  ew_prev_err = err_init
 
   !! begin loop
 
@@ -1770,11 +1795,13 @@ subroutine ice_shelf_solve_outer(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, i
 
 end subroutine ice_shelf_solve_outer
 
+!> Unified inner linear solver for ice shelf velocity.
+!! Performs shared setup (RHS, preconditioner, initial matrix-vector product),
+!! dispatches to the selected Krylov method, and applies boundary conditions.
 subroutine ice_shelf_solve_inner(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, H_node, float_cond, &
                                  hmask, conv_flag, iters, time, Phi, Phisub)
   type(ice_shelf_dyn_CS), intent(in)    :: CS !< A pointer to the ice shelf control structure
-  type(ice_shelf_state),  intent(in)    :: ISS !< A structure with elements that describe
-                                           !! the ice-shelf state
+  type(ice_shelf_state),  intent(in)    :: ISS !< A structure with elements that describe the ice-shelf state
   type(ocean_grid_type),  intent(inout) :: G  !< The grid structure used by the ice shelf.
   type(unit_scale_type),  intent(in)    :: US !< A structure containing unit conversion factors
   real, dimension(SZDIB_(G),SZDJB_(G)), &
@@ -1786,76 +1813,46 @@ subroutine ice_shelf_solve_inner(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, H
   real, dimension(SZDIB_(G),SZDJB_(G)), &
                           intent(in)    :: taudy  !< The y-direction driving stress [R L3 Z T-2 ~> kg m s-2]
   real, dimension(SZDIB_(G),SZDJB_(G)), &
-                          intent(in)    :: H_node !< The ice shelf thickness at nodal (corner)
-                                             !! points [Z ~> m].
+                          intent(in)    :: H_node !< The ice shelf thickness at nodal (corner) points [Z ~> m].
   real, dimension(SZDI_(G),SZDJ_(G)), &
                           intent(in)    :: float_cond !< If GL_regularize=true, indicates cells containing
-                                                !! the grounding line (float_cond=1) or not (float_cond=0)
+                                                      !! the grounding line (float_cond=1) or not (float_cond=0)
   real, dimension(SZDI_(G),SZDJ_(G)), &
                           intent(in)    :: hmask !< A mask indicating which tracer points are
-                                             !! partly or fully covered by an ice-shelf
+                                                 !! partly or fully covered by an ice-shelf
   integer,                intent(out)   :: conv_flag !< A flag indicating whether (1) or not (0) the
-                                           !! iterations have converged to the specified tolerance
+                                                     !! iterations have converged to the specified tolerance
   integer,                intent(out)   :: iters !< The number of iterations used in the solver.
   type(time_type),        intent(in)    :: Time !< The current model time
   real, dimension(8,4,SZDI_(G),SZDJ_(G)), &
                           intent(in)    :: Phi !< The gradients of bilinear basis elements at Gaussian
-                                             !! quadrature points surrounding the cell vertices [L-1 ~> m-1].
+                                               !! quadrature points surrounding the cell vertices [L-1 ~> m-1].
   real, dimension(:,:,:,:,:,:), &
                           intent(in)    :: Phisub !< Quadrature structure weights at subgridscale
-                                            !! locations for finite element calculations [nondim]
-! one linear solve (nonlinear iteration) of the solution for velocity
-
-! in this subroutine:
-!    RHS = taud
-!    diagonal of matrix is found (for Jacobi precondition)
-!    CG iteration is carried out for max. iterations or until convergence
-
-! assumed - u, v, taud, visc, basal_traction are valid on the halo
-
-  real, dimension(SZDIB_(G),SZDJB_(G)) ::  &
-                        Ru, Rv, &     ! Residuals in the stress calculations [R L3 Z T-2 ~> m kg s-2]
-                        Ru_old, Rv_old, & ! Previous values of Ru and Rv [R L3 Z T-2 ~> m kg s-2]
-                        Zu, Zv, & ! Contributions to velocity changes [L T-1 ~> m s-1]
-                        Zu_old, Zv_old, & ! Previous values of Zu and Zv [L T-1 ~> m s-1]
-                        DIAGu, DIAGv, & ! Diagonals with units like Ru/Zu [R L2 Z T-1 ~> kg s-1]
-                        RHSu, RHSv, & ! Right hand side of the stress balance [R L3 Z T-2 ~> m kg s-2]
-                        Au, Av, & ! The retarding lateral stress contributions [R L3 Z T-2 ~> kg m s-2]
-                        Du, Dv, & ! Velocity changes [L T-1 ~> m s-1]
-                        sum_vec ! Sum of squares of residuals in stress calculations [m2 kg2 s-4]
-  real, dimension(SZDIB_(G),SZDJB_(G),3) :: sum_vec_3d ! Array used for various residuals
-                                                       ! sum_vec_3d(:,:,1:2) [m s-1] [m kg s-2]
-                                                       ! sum_vec_3d(:,:,3)   [m2 kg2 s-4]
-  real    :: beta_k      ! Ratio of residuals used to update search direction [nondim]
-  real    :: resid0tol2  ! Convergence tolerance times the initial residual [m2 kg2 s-4]
-  real    :: sv3dsum     ! An unused variable returned when taking global sum of residuals [various]
-  real    :: sv3dsums(3) ! The index-wise global sums of sum_vec_3d
-                         ! sv3dsums(:,:,1:2) [m s-1] [m kg s-2]
-                         ! sv3dsums(:,:,3)   [m2 kg2 s-4]
-  real    :: alpha_k     ! A scaling factor for iterative corrections [nondim]
-  real    :: resid_scale ! A scaling factor for redimensionalizing the global residuals
-                         ! [L T-1 ~> m s-1] [R L3 Z T-2 ~> m kg s-2]
-  real    :: resid2_scale ! A scaling factor for redimensionalizing the global squared residuals
-                         ! [R2 L6 Z2 T-4 ~> m2 kg2 s-4]
-  real    :: rhoi_rhow  ! The density of ice divided by a typical water density [nondim]
-  integer :: cg_halo     ! Number of halo vertices to include during a CG iteration
-  integer :: max_cg_halo ! Maximum possible number of halo vertices to include in the CG iterations
-  integer :: iter, i, j, isd, ied, jsd, jed, isc, iec, jsc, jec, is, js, ie, je
+                                                  !! locations for finite element calculations [nondim]
+
+  real, dimension(SZDIB_(G),SZDJB_(G)) :: &
+        RHSu, RHSv, &      ! Right hand side of the stress balance [R L3 Z T-2 ~> m kg s-2]
+        Au, Av, &          ! Matrix-vector product A*x [R L3 Z T-2 ~> kg m s-2]
+        DIAGu, DIAGv, &    ! Diagonals [R L2 Z T-1 ~> kg s-1]
+        IDIAGu, IDIAGv     ! Reciprocal diagonals [R-1 L-2 Z-1 T ~> kg-1 s]
+  real    :: rhoi_rhow     ! The density of ice divided by a typical water density [nondim]
+  real    :: resid_scale   ! A scaling factor for redimensionalizing the global residuals
+                           ! [T3 kg m2 R-1 Z-1 L-4 s-3 ~> 1]
   integer :: Is_sum, Js_sum, Ie_sum, Je_sum ! Loop bounds for global sums or arrays starting at 1.
-  integer :: Isdq, Iedq, Jsdq, Jedq, Iscq, Iecq, Jscq, Jecq, nx_halo, ny_halo
-  integer :: Iscq_sv, Jscq_sv ! Starting loop bound for sum_vec
+  integer :: Iscq_sv, Jscq_sv ! Starting loop bound for sum_vec arrays
+  integer :: I, J
+  integer :: Isdq, Iedq, Jsdq, Jedq, Iscq, Iecq, Jscq, Jecq
+  integer :: isc, iec, jsc, jec
 
   Isdq = G%IsdB ; Iedq = G%IedB ; Jsdq = G%JsdB ; Jedq = G%JedB
   Iscq = G%IscB ; Iecq = G%IecB ; Jscq = G%JscB ; Jecq = G%JecB
-  ny_halo = G%domain%njhalo ; nx_halo = G%domain%nihalo
-  isd = G%isd ; ied = G%ied ; jsd = G%jsd ; jed = G%jed
   isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec
 
   rhoi_rhow = CS%density_ice / CS%density_ocean_avg
 
-  Zu(:,:) = 0 ; Zv(:,:) = 0 ; DIAGu(:,:) = 0 ; DIAGv(:,:) = 0
-  Ru(:,:) = 0 ; Rv(:,:) = 0 ; Au(:,:) = 0 ; Av(:,:) = 0 ; RHSu(:,:) = 0 ; RHSv(:,:) = 0
-  Du(:,:) = 0 ; Dv(:,:) = 0
+  ! Initialize shared arrays
+  Au(:,:) = 0 ; Av(:,:) = 0 ; DIAGu(:,:) = 0 ; DIAGv(:,:) = 0
 
   ! Determine the loop limits for sums, bearing in mind that the arrays will be starting at 1.
   ! Includes the edge of the tile is at the western/southern bdry (if symmetric)
@@ -1872,39 +1869,175 @@ subroutine ice_shelf_solve_inner(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, H
   Ie_sum = Iecq + (1-Isdq) ; Je_sum = Jecq + (1-Jsdq)
 
   RHSu(:,:) = taudx(:,:) ; RHSv(:,:) = taudy(:,:)
-
   call pass_vector(RHSu, RHSv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
 
   call matrix_diagonal(CS, G, US, float_cond, H_node, CS%ice_visc, CS%basal_traction, &
                        hmask, rhoi_rhow, Phi, Phisub, DIAGu, DIAGv)
-
   call pass_vector(DIAGu, DIAGv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
 
   call CG_action(CS, Au, Av, u_shlf, v_shlf, Phi, Phisub, CS%umask, CS%vmask, hmask, &
                  H_node, CS%ice_visc, float_cond, CS%bed_elev, CS%basal_traction, &
                  G, US, isc-1, iec+1, jsc-1, jec+1, rhoi_rhow, use_newton_in=.false.)
-
   call pass_vector(Au, Av, G%domain, TO_ALL, BGRID_NE, complete=.true.)
 
-  Ru(:,:) = (RHSu(:,:) - Au(:,:)) ; Rv(:,:) = (RHSv(:,:) - Av(:,:))
+  ! Precompute reciprocal diagonal
+  IDIAGu(:,:) = 0.0 ; IDIAGv(:,:) = 0.0
+  do J=Jsdq,Jedq ; do I=Isdq,Iedq
+    if (CS%umask(I,J)==1 .AND. DIAGu(I,J)/=0) IDIAGu(I,J) = 1.0 / DIAGu(I,J)
+    if (CS%vmask(I,J)==1 .AND. DIAGv(I,J)/=0) IDIAGv(I,J) = 1.0 / DIAGv(I,J)
+  enddo ; enddo
+
   resid_scale = US%s_to_T*(US%RZL2_to_kg*US%L_T_to_m_s**2)
-  resid2_scale = ((US%RZ_to_kg_m2*US%L_to_m)*US%L_T_to_m_s**2)**2
 
-  sum_vec(:,:) = 0.0
-  do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
-    if (CS%umask(I,J) == 1) sum_vec(I,J) = resid2_scale*Ru(I,J)**2
-    if (CS%vmask(I,J) == 1) sum_vec(I,J) = sum_vec(I,J) + resid2_scale*Rv(I,J)**2
+  ! Dispatch to selected solver
+  select case (CS%inner_solver)
+    case (INNER_CG)
+      call ice_shelf_solve_inner_CG(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                    IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                    rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                    Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+    case (INNER_MINRES)
+      call ice_shelf_solve_inner_MINRES(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                        IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                        rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                        Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+    case (INNER_CR)
+      call ice_shelf_solve_inner_CR(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                    IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                    rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                    Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+  end select
+
+  ! Shared teardown: Apply boundary conditions
+  do J=Jsdq,Jedq ; do I=Isdq,Iedq
+      if (CS%umask(I,J) == 3) then
+        u_shlf(I,J) = CS%u_bdry_val(I,J)
+      elseif (CS%umask(I,J) == 0) then
+        u_shlf(I,J) = 0
+      endif
+
+      if (CS%vmask(I,J) == 3) then
+        v_shlf(I,J) = CS%v_bdry_val(I,J)
+      elseif (CS%vmask(I,J) == 0) then
+        v_shlf(I,J) = 0
+      endif
   enddo ; enddo
 
-  !resid0 = sqrt(reproducing_sum( sum_vec, Is_sum, Ie_sum, Js_sum, Je_sum ))
-  resid0tol2 = CS%cg_tol_newton**2 * reproducing_sum( sum_vec, Is_sum, Ie_sum, Js_sum, Je_sum )
+  call pass_vector(u_shlf, v_shlf, G%domain, TO_ALL, BGRID_NE)
+
+  if (conv_flag == 0) then
+    iters = CS%cg_max_iterations
+  endif
+
+end subroutine ice_shelf_solve_inner
+
+!> CG (Conjugate Gradient) inner Krylov solve for ice shelf velocity.
+subroutine ice_shelf_solve_inner_CG(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                     IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                     rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                     Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+  type(ice_shelf_dyn_CS), intent(in)    :: CS !< A pointer to the ice shelf control structure
+  type(ocean_grid_type),  intent(inout) :: G  !< The grid structure used by the ice shelf.
+  type(unit_scale_type),  intent(in)    :: US !< A structure containing unit conversion factors
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: u_shlf  !< The zonal ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: v_shlf  !< The meridional ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSu !< Right hand side, x [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSv !< Right hand side, y [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Au !< Matrix-vector product workspace, x [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Av !< Matrix-vector product workspace, y [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGu !< Reciprocal Jacobi diagonal, x [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGv !< Reciprocal Jacobi diagonal, y [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: H_node !< The ice shelf thickness at nodal points [Z ~> m]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: float_cond !< Grounding line indicator [nondim]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: hmask !< Ice shelf coverage mask
+  real,                   intent(in)    :: rhoi_rhow !< Ice-to-ocean density ratio [nondim]
+  real,                   intent(in)    :: resid_scale !< Scaling for inner products
+                                                       !! [T3 kg m2 R-1 Z-1 L-4 s-3 ~> 1]
+  real, dimension(8,4,SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: Phi !< Basis element gradients at quadrature points [L-1 ~> m-1]
+  real, dimension(:,:,:,:,:,:), &
+                          intent(in)    :: Phisub !< Subgridscale quadrature weights [nondim]
+  integer,                intent(out)   :: conv_flag !< Convergence flag: 1=converged, 0=not
+  integer,                intent(out)   :: iters !< The number of iterations used
+  integer,                intent(in)    :: Is_sum !< Starting i-index for global sums
+  integer,                intent(in)    :: Js_sum !< Starting j-index for global sums
+  integer,                intent(in)    :: Ie_sum !< Ending i-index for global sums
+  integer,                intent(in)    :: Je_sum !< Ending j-index for global sums
+  integer,                intent(in)    :: Iscq_sv !< Starting i-index for sum_vec arrays
+  integer,                intent(in)    :: Jscq_sv !< Starting j-index for sum_vec arrays
+
+  real, dimension(SZDIB_(G),SZDJB_(G)) ::  &
+                        Ru, Rv, &     ! Residuals [R L3 Z T-2 ~> m kg s-2]
+                        Zu, Zv, &     ! Preconditioned residuals [L T-1 ~> m s-1]
+                        Du, Dv        ! Search directions [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)) :: sum_vec ! Pointwise D·A products for the alpha_k global sum
+                                                  ! [kg m2 s-3]
+  real, dimension(SZDIB_(G),SZDJB_(G),2) :: sum_vec_3d ! Array used for various residuals
+                                                       ! sum_vec_3d(:,:,1) [kg m2 s-3]
+                                                       ! sum_vec_3d(:,:,2) [kg2 m2 s-4]
+  real    :: beta_k      ! Ratio of residuals used to update search direction [nondim]
+  real    :: resid0tol2  ! Convergence tolerance times the initial residual [m2 kg2 s-4]
+  real    :: sv3dsum     ! An unused variable returned when taking global sum of residuals [various]
+  real    :: sv3dsums(2) ! The index-wise global sums of sum_vec_3d
+                         ! sv3dsums(1) [kg m2 s-3]
+                         ! sv3dsums(2) [kg2 m2 s-4]
+  real    :: alpha_k     ! A scaling factor for iterative corrections [nondim]
+  real    :: rho_old     ! The preconditioned residual inner product Z·R from the previous CG
+                         ! iteration, scaled by resid_scale [kg m2 s-3]
+  real    :: resid2_scale ! A scaling factor for redimensionalizing the global squared residuals
+                          ! [T4 kg2 m2 R-2 Z-2 L-6 s-4 ~> 1]
+  integer :: cg_halo     ! Number of halo vertices to include during a CG iteration
+  integer :: max_cg_halo ! Maximum possible number of halo vertices to include in the CG iterations
+  integer :: iter, i, j, isc, iec, jsc, jec, is, js, ie, je, is2, ie2, js2, je2
+  integer :: Isdq, Iedq, Jsdq, Jedq, Iscq, Iecq, Jscq, Jecq, nx_halo, ny_halo
+
+  Isdq = G%IsdB ; Iedq = G%IedB ; Jsdq = G%JsdB ; Jedq = G%JedB
+  Iscq = G%IscB ; Iecq = G%IecB ; Jscq = G%JscB ; Jecq = G%JecB
+  ny_halo = G%domain%njhalo ; nx_halo = G%domain%nihalo
+  isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec
+
+  resid2_scale = ((US%RZ_to_kg_m2*US%L_to_m)*US%L_T_to_m_s**2)**2
+
+  Ru(:,:) = 0 ; Rv(:,:) = 0 ; Zu(:,:) = 0 ; Zv(:,:) = 0 ; Du(:,:) = 0 ; Dv(:,:) = 0
+
+  Ru(:,:) = (RHSu(:,:) - Au(:,:)) ; Rv(:,:) = (RHSv(:,:) - Av(:,:))
 
   do J=Jsdq,Jedq ; do I=Isdq,Iedq
-    if (CS%umask(I,J) == 1 .AND.(DIAGu(I,J)/=0)) Zu(I,J) = Ru(I,J) / DIAGu(I,J)
-    if (CS%vmask(I,J) == 1 .AND.(DIAGv(I,J)/=0)) Zv(I,J) = Rv(I,J) / DIAGv(I,J)
+    if (CS%umask(I,J) == 1) Zu(I,J) = Ru(I,J) * IDIAGu(I,J)
+    if (CS%vmask(I,J) == 1) Zv(I,J) = Rv(I,J) * IDIAGv(I,J)
+    Du(I,J) = Zu(I,J)
+    Dv(I,J) = Zv(I,J)
   enddo ; enddo
 
-  Du(:,:) = Zu(:,:) ; Dv(:,:) = Zv(:,:)
+  ! Compute rho_old = Z·R and resid0tol2 before the CG loop
+  sum_vec_3d(:,:,:) = 0.0
+  do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+    if (CS%umask(I,J) == 1) then
+      sum_vec_3d(I,J,1) = resid_scale  * (Zu(I,J) * Ru(I,J))
+      sum_vec_3d(I,J,2) = resid2_scale * Ru(I,J)**2
+    endif
+    if (CS%vmask(I,J) == 1) then
+      sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid_scale  * (Zv(I,J) * Rv(I,J))
+      sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid2_scale * Rv(I,J)**2
+    endif
+  enddo ; enddo
+
+  sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
+
+  rho_old = sv3dsums(1)
+  !resid0 = sqrt(sv3dsums(2))
+  resid0tol2 = CS%cg_tol_newton**2 * sv3dsums(2)
 
   if (G%symmetric) then
     max_cg_halo=min(nx_halo,ny_halo)
@@ -1914,133 +2047,568 @@ subroutine ice_shelf_solve_inner(CS, ISS, G, US, u_shlf, v_shlf, taudx, taudy, H
   cg_halo = max_cg_halo
   conv_flag = 0
 
+  if (CS%cg_halo_shrink) then
+    is = isc - cg_halo ; ie = Iecq + cg_halo
+    js = jsc - cg_halo ; je = Jecq + cg_halo
+    is2 = is ; ie2 = ie-1
+    js2 = js ; je2 = je-1
+  else
+    is = isc - 1 ; ie = iec + 1
+    js = jsc - 1 ; je = jec + 1
+    is2 = Iscq ; ie2 = Iecq
+    js2 = Jscq ; je2 = Jecq
+  endif
+
   !!!!!!!!!!!!!!!!!!
   !!              !!
   !! MAIN CG LOOP !!
   !!              !!
   !!!!!!!!!!!!!!!!!!
 
-  ! initially, c-grid data is valid up to 3 halo nodes out
-
   do iter = 1,CS%cg_max_iterations
 
-    ! we can never assume that any arrays are legit more than 3 vertices past
-    ! the computational domain - this is their state in the initial iteration
-
-    is = isc - cg_halo ; ie = Iecq + cg_halo
-    js = jsc - cg_halo ; je = Jecq + cg_halo
-
     Au(:,:) = 0 ; Av(:,:) = 0
 
     call CG_action(CS, Au, Av, Du, Dv, Phi, Phisub, CS%umask, CS%vmask, hmask, &
                    H_node, CS%ice_visc, float_cond, CS%bed_elev, CS%basal_traction, &
                    G, US, is, ie, js, je, rhoi_rhow)
 
-    ! Au, Av valid region moves in by 1
-
-    call pass_vector(Au,Av,G%domain, TO_ALL, BGRID_NE)
-
-    sum_vec_3d(:,:,1:2) = 0.0
+    sum_vec(:,:) = 0.0
 
     do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
-      if (CS%umask(I,J) == 1) then
-        sum_vec_3d(I,J,1) = resid_scale * (Zu(I,J) * Ru(I,J))
-        sum_vec_3d(I,J,2) = resid_scale * (Du(I,J) * Au(I,J))
-        Ru_old(I,J) = Ru(I,J) ; Zu_old(I,J) = Zu(I,J)
-      endif
-      if (CS%vmask(I,J) == 1) then
-        sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid_scale * (Zv(I,J) * Rv(I,J))
-        sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid_scale * (Dv(I,J) * Av(I,J))
-        Rv_old(I,J) = Rv(I,J) ; Zv_old(I,J) = Zv(I,J)
-      endif
+      if (CS%umask(I,J) == 1) sum_vec(I,J) = resid_scale * (Du(I,J) * Au(I,J))
+      if (CS%vmask(I,J) == 1) sum_vec(I,J) = sum_vec(I,J) + resid_scale * (Dv(I,J) * Av(I,J))
     enddo ; enddo
 
-    sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
+    sv3dsum = reproducing_sum( sum_vec(:,:), Is_sum, Ie_sum, Js_sum, Je_sum )
 
-    alpha_k = sv3dsums(1)/sv3dsums(2)
+    if (sv3dsum == 0.0) then
+      iters = iter
+      conv_flag = 1
+      exit
+    endif
+
+    alpha_k = rho_old / sv3dsum
 
-    do J=js,je-1 ; do I=is,ie-1
+    do J=js2,je2 ; do I=is2,ie2
       if (CS%umask(I,J) == 1) then
         u_shlf(I,J) = u_shlf(I,J) + alpha_k * Du(I,J)
         Ru(I,J) = Ru(I,J) - alpha_k * Au(I,J)
-        if (DIAGu(I,J)/=0) Zu(I,J) = Ru(I,J) / DIAGu(I,J)
+        Zu(I,J) = Ru(I,J) * IDIAGu(I,J)
       endif
       if (CS%vmask(I,J) == 1) then
         v_shlf(I,J) = v_shlf(I,J) + alpha_k * Dv(I,J)
         Rv(I,J) = Rv(I,J) - alpha_k * Av(I,J)
-        if (DIAGv(I,J)/=0) Zv(I,J) = Rv(I,J) / DIAGv(I,J)
+        Zv(I,J) = Rv(I,J) * IDIAGv(I,J)
       endif
     enddo ; enddo
 
-
-    ! R,u,v,Z valid region moves in by 1
-
-    ! beta_k = (Z \dot R) / (Zold \dot Rold)
-    sum_vec_3d(:,:,:) = 0.0 ; sv3dsums(:) = 0.0
+    ! beta_k = (Z \dot R) / (Z_prev \dot R_prev)
+    sum_vec_3d(:,:,:) = 0.0 ; sv3dsums(:)=0.0
 
     do J=jscq_sv,jecq ; do i=iscq_sv,iecq
       if (CS%umask(I,J) == 1) then
         sum_vec_3d(I,J,1) = resid_scale  * (Zu(I,J) * Ru(I,J))
-        sum_vec_3d(I,J,2) = resid_scale  * (Zu_old(I,J) * Ru_old(I,J))
-        sum_vec_3d(I,J,3) = resid2_scale * Ru(I,J)**2
+        sum_vec_3d(I,J,2) = resid2_scale * Ru(I,J)**2
       endif
       if (CS%vmask(I,J) == 1) then
         sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid_scale  * (Zv(I,J) * Rv(I,J))
-        sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid_scale  * (Zv_old(I,J) * Rv_old(I,J))
-        sum_vec_3d(I,J,3) = sum_vec_3d(I,J,3) + resid2_scale * Rv(I,J)**2
+        sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid2_scale * Rv(I,J)**2
       endif
     enddo ; enddo
 
-    sv3dsum = reproducing_sum( sum_vec_3d, Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums )
+    sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
+
+    beta_k = sv3dsums(1) / rho_old
+
+    if (sv3dsums(2) <= resid0tol2) then
+      iters = iter
+      conv_flag = 1
+      exit
+    endif
+
+    do J=js2,je2 ; do I=is2,ie2
+      if (CS%umask(I,J) == 1) Du(I,J) = Zu(I,J) + beta_k * Du(I,J)
+      if (CS%vmask(I,J) == 1) Dv(I,J) = Zv(I,J) + beta_k * Dv(I,J)
+    enddo ; enddo
+
+    rho_old = sv3dsums(1)
+
+    if (CS%cg_halo_shrink) then
+      cg_halo = cg_halo - 1
+      if (cg_halo == 0) then
+        call pass_vector(Du, Dv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
+        call pass_vector(Zu, Zv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
+        call pass_vector(Ru, Rv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
+        call pass_vector(u_shlf, v_shlf, G%domain, TO_ALL, BGRID_NE, complete=.true.)
+        cg_halo = max_cg_halo
+      endif
+      is = isc - cg_halo ; ie = Iecq + cg_halo
+      js = jsc - cg_halo ; je = Jecq + cg_halo
+      is2 = is ; ie2 = ie-1
+      js2 = js ; je2 = je-1
+    else
+      call pass_vector(Du, Dv, G%domain, TO_ALL, BGRID_NE)
+    endif
+
+  enddo ! end of CG loop
+
+end subroutine ice_shelf_solve_inner_CG
+
+!> MINRES inner Krylov solve for ice shelf velocity.
+subroutine ice_shelf_solve_inner_MINRES(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                         IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                         rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                         Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+  type(ice_shelf_dyn_CS), intent(in)    :: CS !< A pointer to the ice shelf control structure
+  type(ocean_grid_type),  intent(inout) :: G  !< The grid structure used by the ice shelf.
+  type(unit_scale_type),  intent(in)    :: US !< A structure containing unit conversion factors
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: u_shlf  !< The zonal ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: v_shlf  !< The meridional ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSu !< Right hand side, x [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSv !< Right hand side, y [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Au !< Matrix-vector product workspace, x [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Av !< Matrix-vector product workspace, y [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGu !< Reciprocal Jacobi diagonal, x [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGv !< Reciprocal Jacobi diagonal, y [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: H_node !< The ice shelf thickness at nodal points [Z ~> m]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: float_cond !< Grounding line indicator [nondim]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: hmask !< Ice shelf coverage mask
+  real,                   intent(in)    :: rhoi_rhow !< Ice-to-ocean density ratio [nondim]
+  real,                   intent(in)    :: resid_scale !< Scaling for inner products
+                                                       !! [T3 kg m2 R-1 Z-1 L-4 s-3 ~> 1]
+  real, dimension(8,4,SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: Phi !< Basis element gradients at quadrature points [L-1 ~> m-1]
+  real, dimension(:,:,:,:,:,:), &
+                          intent(in)    :: Phisub !< Subgridscale quadrature weights [nondim]
+  integer,                intent(out)   :: conv_flag !< Convergence flag: 1=converged, 0=not
+  integer,                intent(out)   :: iters !< The number of iterations used
+  integer,                intent(in)    :: Is_sum !< Starting i-index for global sums
+  integer,                intent(in)    :: Js_sum !< Starting j-index for global sums
+  integer,                intent(in)    :: Ie_sum !< Ending i-index for global sums
+  integer,                intent(in)    :: Je_sum !< Ending j-index for global sums
+  integer,                intent(in)    :: Iscq_sv !< Starting i-index for sum_vec arrays
+  integer,                intent(in)    :: Jscq_sv !< Starting j-index for sum_vec arrays
+
+  real, dimension(SZDIB_(G),SZDJB_(G)) ::  &
+        V_old_u, V_old_v, V_curr_u, V_curr_v, V_new_u, V_new_v, & ! Lanczos basis vectors [R L3 Z T-2 ~> m kg s-2]
+        Z_curr_u, Z_curr_v, Z_new_u, Z_new_v, &    ! Preconditioned Lanczos vectors [L T-1 ~> m s-1]
+        W_old_u, W_old_v, W_curr_u, W_curr_v, W_new_u, W_new_v, & ! MINRES search directions [L T-1 ~> m s-1]
+        Qu, Qv            ! A * Z_curr [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)) :: sum_vec_3d ! Pointwise products for global sums
+                                                     ! [kg m2 s-3] before normalization;
+                                                     ! [nondim] inside loop (after Lanczos normalization)
+  real    :: alpha       ! Lanczos diagonal element (Rayleigh quotient) [nondim]
+  real    :: beta1       ! Current Lanczos off-diagonal coefficient;
+                         ! initial value [kg^1/2 m s^-3/2], then [nondim] after iter 1
+  real    :: beta2       ! Next Lanczos off-diagonal coefficient [nondim]
+  real    :: eta         ! MINRES residual norm estimate [kg^1/2 m s^-3/2]
+  real    :: eta_curr    ! Effective step magnitude for current iteration [kg^1/2 m s^-3/2]
+  real    :: c0, s0, c1, s1, c2, s2  ! Givens rotation cosines and sines [nondim]
+  real    :: d0, d1, d2  ! Tridiagonal QR factorization coefficients [nondim]
+  real    :: resid0tol   ! Convergence tolerance (CS%cg_tol_newton * beta1) [kg^1/2 m s^-3/2]
+  real    :: current_norm ! Current MINRES residual norm estimate [kg^1/2 m s^-3/2]
+  real    :: sv3dsum     ! Global reproducing sum of sum_vec_3d;
+                         ! [kg m2 s-3] before normalization, [nondim] inside loop
+  real    :: Ibeta1      ! Reciprocal of initial beta1 [kg^-1/2 m-1 s^3/2]
+  real    :: Ibeta2      ! Reciprocal of beta2 [nondim]
+  real    :: Id1         ! Reciprocal of d1 [nondim]
+  integer :: iter, i, j, isc, iec, jsc, jec
+  integer :: Isdq, Iedq, Jsdq, Jedq, Iscq, Iecq, Jscq, Jecq
+
+  Isdq = G%IsdB ; Iedq = G%IedB ; Jsdq = G%JsdB ; Jedq = G%JedB
+  Iscq = G%IscB ; Iecq = G%IecB ; Jscq = G%JscB ; Jecq = G%JecB
+  isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec
+
+  ! Initialize MINRES-specific arrays
+  V_old_u(:,:) = 0 ; V_old_v(:,:) = 0 ; V_curr_u(:,:) = 0 ; V_curr_v(:,:) = 0
+  Z_curr_u(:,:) = 0 ; Z_curr_v(:,:) = 0
+  W_old_u(:,:) = 0 ; W_old_v(:,:) = 0 ; W_curr_u(:,:) = 0 ; W_curr_v(:,:) = 0
+  Qu(:,:) = 0 ; Qv(:,:) = 0
+
+  ! Initial Residual
+  V_curr_u(:,:) = (RHSu(:,:) - Au(:,:)) ; V_curr_v(:,:) = (RHSv(:,:) - Av(:,:))
+
+  do J=Jscq,Jecq ; do I=Iscq,Iecq
+     if (CS%umask(I,J) == 1) Z_curr_u(I,J) = V_curr_u(I,J) * IDIAGu(I,J)
+     if (CS%vmask(I,J) == 1) Z_curr_v(I,J) = V_curr_v(I,J) * IDIAGv(I,J)
+  enddo ; enddo
+
+  sum_vec_3d(:,:) = 0.0
+  do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+    if (CS%umask(I,J) == 1) sum_vec_3d(I,J) = resid_scale * (V_curr_u(I,J) * Z_curr_u(I,J))
+    if (CS%vmask(I,J) == 1) sum_vec_3d(I,J) = sum_vec_3d(I,J) + resid_scale * (V_curr_v(I,J) * Z_curr_v(I,J))
+  enddo ; enddo
+  sv3dsum = reproducing_sum( sum_vec_3d(:,:), Is_sum, Ie_sum, Js_sum, Je_sum )
+
+  beta1 = sqrt(abs(sv3dsum))
+
+  if (beta1 == 0.0) then
+     conv_flag = 1
+     iters = 0
+     return
+  endif
+
+  Ibeta1 = 1.0/beta1
+
+  ! Normalize initial Lanczos vectors
+  do J=Jscq,Jecq ; do I=Iscq,Iecq
+     if (CS%umask(I,J) == 1) then
+         V_curr_u(I,J) = V_curr_u(I,J) * Ibeta1
+         Z_curr_u(I,J) = Z_curr_u(I,J) * Ibeta1
+     endif
+     if (CS%vmask(I,J) == 1) then
+         V_curr_v(I,J) = V_curr_v(I,J) * Ibeta1
+         Z_curr_v(I,J) = Z_curr_v(I,J) * Ibeta1
+     endif
+  enddo ; enddo
+
+  ! Sync Z_curr prior to entering the loop
+  call pass_vector(Z_curr_u, Z_curr_v, G%domain, TO_ALL, BGRID_NE)
+
+  eta = beta1
+  resid0tol = CS%cg_tol_newton * beta1
+  conv_flag = 0
+
+  c0 = 1.0 ; s0 = 0.0 ; c1 = 1.0 ; s1 = 0.0
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  !!                                !!
+  !! MAIN MINRES LANCZOS LOOP       !!
+  !!                                !!
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
-    beta_k = sv3dsums(1)/sv3dsums(2)
+  do iter = 1, CS%cg_max_iterations
 
-    do J=js,je-1 ; do I=is,ie-1
-        if (CS%umask(I,J) == 1) Du(I,J) = Zu(I,J) + beta_k * Du(I,J)
-        if (CS%vmask(I,J) == 1) Dv(I,J) = Zv(I,J) + beta_k * Dv(I,J)
+    ! --- STEP 1: Matrix Vector Product ---
+    Qu(:,:) = 0 ; Qv(:,:) = 0
+    call CG_action(CS, Qu, Qv, Z_curr_u, Z_curr_v, Phi, Phisub, CS%umask, CS%vmask, hmask, &
+                   H_node, CS%ice_visc, float_cond, CS%bed_elev, CS%basal_traction, &
+                   G, US, isc-1, iec+1, jsc-1, jec+1, rhoi_rhow)
+    ! --- STEP 2: alpha = q dot z_curr ---
+    sum_vec_3d(:,:) = 0.0
+    do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+       if (CS%umask(I,J) == 1) sum_vec_3d(I,J) = resid_scale * (Qu(I,J) * Z_curr_u(I,J))
+       if (CS%vmask(I,J) == 1) sum_vec_3d(I,J) = sum_vec_3d(I,J) + resid_scale * (Qv(I,J) * Z_curr_v(I,J))
+    enddo ; enddo
+    sv3dsum = reproducing_sum( sum_vec_3d(:,:), Is_sum, Ie_sum, Js_sum, Je_sum )
+    alpha = sv3dsum
+
+    ! --- FUSED STEPS 3 & 4: Update V_new and Precondition to Z_new ---
+    do J=Jscq,Jecq ; do I=Iscq,Iecq
+       if (CS%umask(I,J) == 1) then
+           V_new_u(I,J) = Qu(I,J) - alpha * V_curr_u(I,J) - beta1 * V_old_u(I,J)
+           Z_new_u(I,J) = V_new_u(I,J) * IDIAGu(I,J)
+       endif
+       if (CS%vmask(I,J) == 1) then
+           V_new_v(I,J) = Qv(I,J) - alpha * V_curr_v(I,J) - beta1 * V_old_v(I,J)
+           Z_new_v(I,J) = V_new_v(I,J) * IDIAGv(I,J)
+       endif
+    enddo ; enddo
+
+    ! --- STEP 5: beta2 = sqrt(v_new dot z_new) ---
+    sum_vec_3d(:,:) = 0.0
+    do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+       if (CS%umask(I,J) == 1) sum_vec_3d(I,J) = resid_scale * (V_new_u(I,J) * Z_new_u(I,J))
+       if (CS%vmask(I,J) == 1) sum_vec_3d(I,J) = sum_vec_3d(I,J) + resid_scale * (V_new_v(I,J) * Z_new_v(I,J))
     enddo ; enddo
+    sv3dsum = reproducing_sum( sum_vec_3d(:,:), Is_sum, Ie_sum, Js_sum, Je_sum )
+    beta2 = sqrt(abs(sv3dsum))
 
-   ! D valid region moves in by 1
+    ! --- STEP 6: Apply Givens Rotations ---
+    d0 = c1 * alpha - c0 * s1 * beta1
+    d1 = sqrt(d0**2 + beta2**2)
 
-    !if sqrt(sv3dsums(3)) <= (CS%cg_tolerance * resid0)
-    if (sv3dsums(3) <= resid0tol2) then
+    if (d1 == 0.0) then
       iters = iter
       conv_flag = 1
       exit
     endif
 
-    cg_halo = cg_halo - 1
+    Id1 = 1.0 / d1
+    if (beta2 > 0) Ibeta2 = 1.0 / beta2
+
+    d2 = s1 * alpha + c0 * c1 * beta1
+    c2 = d0 * Id1
+    s2 = beta2 * Id1
+
+    eta_curr = c2 * eta
+    eta = -s2 * eta
+    current_norm = abs(eta)
+
+    ! --- FUSED STEPS 7 & 9: Update u/v, Check Convergence, and Shift Vectors ---
+    do J=Jscq,Jecq ; do I=Iscq,Iecq
+       if (CS%umask(I,J) == 1) then
+           W_new_u(I,J) = (Z_curr_u(I,J) - (d2 * W_curr_u(I,J) + beta1 * s0 * W_old_u(I,J))) * Id1
+           u_shlf(I,J) = u_shlf(I,J) + eta_curr * W_new_u(I,J)
+           if (beta2 > 0.0) then
+               V_old_u(I,J) = V_curr_u(I,J)
+               V_curr_u(I,J) = V_new_u(I,J) * Ibeta2
+               Z_curr_u(I,J) = Z_new_u(I,J) * Ibeta2
+               W_old_u(I,J) = W_curr_u(I,J)
+               W_curr_u(I,J) = W_new_u(I,J)
+           endif
+       endif
+       if (CS%vmask(I,J) == 1) then
+           W_new_v(I,J) = (Z_curr_v(I,J) - (d2 * W_curr_v(I,J) + beta1 * s0 * W_old_v(I,J))) * Id1
+           v_shlf(I,J) = v_shlf(I,J) + eta_curr * W_new_v(I,J)
+           if (beta2 > 0.0) then
+               V_old_v(I,J) = V_curr_v(I,J)
+               V_curr_v(I,J) = V_new_v(I,J) * Ibeta2
+               Z_curr_v(I,J) = Z_new_v(I,J) * Ibeta2
+               W_old_v(I,J) = W_curr_v(I,J)
+               W_curr_v(I,J) = W_new_v(I,J)
+           endif
+       endif
+    enddo ; enddo
 
-    if (cg_halo == 0) then
-     ! pass vectors
-      call pass_vector(Du, Dv, G%domain, TO_ALL, BGRID_NE, complete=.false.)
-      call pass_vector(u_shlf, v_shlf, G%domain, TO_ALL, BGRID_NE, complete=.false.)
-      call pass_vector(Ru, Rv, G%domain, TO_ALL, BGRID_NE, complete=.true.)
-      cg_halo = max_cg_halo
+    ! --- STEP 8: Check Convergence ---
+    if (current_norm <= resid0tol .or. beta2 == 0.0) then
+      iters = iter
+      conv_flag = 1
+      exit
     endif
 
-  enddo ! end of CG loop
+    ! Sync Z_curr for the next iteration's CG_action
+    call pass_vector(Z_curr_u, Z_curr_v, G%domain, TO_ALL, BGRID_NE)
 
+    beta1 = beta2
+    c0 = c1 ; c1 = c2
+    s0 = s1 ; s1 = s2
+
+  enddo ! end of MINRES loop
+
+end subroutine ice_shelf_solve_inner_MINRES
+
+!> CR (Conjugate Residual) inner Krylov solve for ice shelf velocity.
+subroutine ice_shelf_solve_inner_CR(CS, G, US, u_shlf, v_shlf, RHSu, RHSv, Au, Av, &
+                                     IDIAGu, IDIAGv, H_node, float_cond, hmask, &
+                                     rhoi_rhow, resid_scale, Phi, Phisub, conv_flag, iters, &
+                                     Is_sum, Js_sum, Ie_sum, Je_sum, Iscq_sv, Jscq_sv)
+  type(ice_shelf_dyn_CS), intent(in)    :: CS !< A pointer to the ice shelf control structure
+  type(ocean_grid_type),  intent(inout) :: G  !< The grid structure used by the ice shelf.
+  type(unit_scale_type),  intent(in)    :: US !< A structure containing unit conversion factors
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: u_shlf  !< The zonal ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: v_shlf  !< The meridional ice shelf velocity [L T-1 ~> m s-1]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSu !< Right hand side, x [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: RHSv !< Right hand side, y [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Au !< Matrix-vector product workspace, x [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(inout) :: Av !< Matrix-vector product workspace, y [R L3 Z T-2 ~> kg m s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGu !< Reciprocal Jacobi diagonal, x [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: IDIAGv !< Reciprocal Jacobi diagonal, y [R-1 L-2 Z-1 T ~> kg-1 s]
+  real, dimension(SZDIB_(G),SZDJB_(G)), &
+                          intent(in)    :: H_node !< The ice shelf thickness at nodal points [Z ~> m]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: float_cond !< Grounding line indicator [nondim]
+  real, dimension(SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: hmask !< Ice shelf coverage mask
+  real,                   intent(in)    :: rhoi_rhow !< Ice-to-ocean density ratio [nondim]
+  real,                   intent(in)    :: resid_scale !< Scaling for inner products
+                                                       !! [T3 kg m2 R-1 Z-1 L-4 s-3 ~> 1]
+  real, dimension(8,4,SZDI_(G),SZDJ_(G)), &
+                          intent(in)    :: Phi !< Basis element gradients at quadrature points [L-1 ~> m-1]
+  real, dimension(:,:,:,:,:,:), &
+                          intent(in)    :: Phisub !< Subgridscale quadrature weights [nondim]
+  integer,                intent(out)   :: conv_flag !< Convergence flag: 1=converged, 0=not
+  integer,                intent(out)   :: iters !< The number of iterations used
+  integer,                intent(in)    :: Is_sum !< Starting i-index for global sums
+  integer,                intent(in)    :: Js_sum !< Starting j-index for global sums
+  integer,                intent(in)    :: Ie_sum !< Ending i-index for global sums
+  integer,                intent(in)    :: Je_sum !< Ending j-index for global sums
+  integer,                intent(in)    :: Iscq_sv !< Starting i-index for sum_vec arrays
+  integer,                intent(in)    :: Jscq_sv !< Starting j-index for sum_vec arrays
+
+  real, dimension(SZDIB_(G),SZDJB_(G)) ::  &
+                        Ru, Rv, &         ! Residuals (r) [R L3 Z T-2 ~> m kg s-2]
+                        Zu, Zv, &         ! Preconditioned residuals (z = M^-1 r) [L T-1 ~> m s-1]
+                        Du, Dv, &         ! Search directions (p) [L T-1 ~> m s-1]
+                        Qu, Qv            ! A * p [R L3 Z T-2 ~> m kg s-2]
+  real, dimension(SZDIB_(G),SZDJB_(G),2) :: sum_vec_3d ! Pointwise products for global sums.
+                                ! sum_vec_3d(:,:,1): r^2 [kg2 m2 s-4] or z·q [kg m2 s-3] (context-dependent)
+                                ! sum_vec_3d(:,:,2): z·w or q·(M^-1 q) [kg m2 s-3]
+  real    :: alpha        ! Step length [nondim]
+  real    :: beta         ! Direction update coefficient [nondim]
+  real    :: r_norm_sq    ! Squared residual norm [kg2 m2 s-4]
+  real    :: z_w_sum      ! Inner product (z_k, A z_k); beta denominator [kg m2 s-3]
+  real    :: z_w_sum_new  ! Inner product (z_{k+1}, A z_{k+1}); beta numerator [kg m2 s-3]
+  real    :: z_q_sum      ! Inner product (z_k, A p_k); alpha numerator [kg m2 s-3]
+  real    :: q_s_sum      ! Inner product (A p_k, M^-1 A p_k); alpha denom [kg m2 s-3]
+  real    :: resid0tol2   ! Convergence threshold: tol^2 * ||r_0||^2 [kg2 m2 s-4]
+  real    :: sv3dsum      ! Unused scalar return from reproducing_sum [various]
+  real    :: sv3dsums(2)  ! Component sums from reproducing_sum
+                          ! sv3dsums(1): r^2 or z·q [kg2 m2 s-4 or kg m2 s-3] (context-dependent)
+                          ! sv3dsums(2): z·w or q·M^-1 q [kg m2 s-3]
+  real    :: resid2_scale ! Scaling for squared-stress inner products [T4 kg2 m2 R-2 Z-2 L-6 s-4 ~> 1]
+  integer :: iter, i, j, isc, iec, jsc, jec
+  integer :: Isdq, Iedq, Jsdq, Jedq, Iscq, Iecq, Jscq, Jecq
+
+  Isdq = G%IsdB ; Iedq = G%IedB ; Jsdq = G%JsdB ; Jedq = G%JedB
+  Iscq = G%IscB ; Iecq = G%IecB ; Jscq = G%JscB ; Jecq = G%JecB
+  isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec
+
+  resid2_scale = ((US%RZ_to_kg_m2*US%L_to_m)*US%L_T_to_m_s**2)**2
+
+  ! Initialize CR-specific arrays
+  Ru(:,:) = 0 ; Rv(:,:) = 0 ; Zu(:,:) = 0 ; Zv(:,:) = 0
+  Du(:,:) = 0 ; Dv(:,:) = 0 ; Qu(:,:) = 0 ; Qv(:,:) = 0
+
+  ! r_0 = b - A*x_0
+  Ru(:,:) = (RHSu(:,:) - Au(:,:)) ; Rv(:,:) = (RHSv(:,:) - Av(:,:))
+
+  ! z_0 = M^-1 r_0
   do J=Jsdq,Jedq ; do I=Isdq,Iedq
-      if (CS%umask(I,J) == 3) then
-        u_shlf(I,J) = CS%u_bdry_val(I,J)
-      elseif (CS%umask(I,J) == 0) then
-        u_shlf(I,J) = 0
-      endif
+     if (CS%umask(I,J) == 1) Zu(I,J) = Ru(I,J) * IDIAGu(I,J)
+     if (CS%vmask(I,J) == 1) Zv(I,J) = Rv(I,J) * IDIAGv(I,J)
+  enddo ; enddo
 
-      if (CS%vmask(I,J) == 3) then
-        v_shlf(I,J) = CS%v_bdry_val(I,J)
-      elseif (CS%vmask(I,J) == 0) then
-        v_shlf(I,J) = 0
-      endif
+  ! p_0 = z_0
+  Du(:,:) = Zu(:,:) ; Dv(:,:) = Zv(:,:)
+
+  ! Compute A * z_0
+  Au(:,:) = 0 ; Av(:,:) = 0
+  call CG_action(CS, Au, Av, Zu, Zv, Phi, Phisub, CS%umask, CS%vmask, hmask, &
+                 H_node, CS%ice_visc, float_cond, CS%bed_elev, CS%basal_traction, &
+                 G, US, isc-1, iec+1, jsc-1, jec+1, rhoi_rhow)
+  call pass_vector(Au, Av, G%domain, TO_ALL, BGRID_NE)
+
+  ! q_0 = A * p_0
+  Qu(:,:) = Au(:,:) ; Qv(:,:) = Av(:,:)
+
+  ! Initial Norms
+  sum_vec_3d(:,:,:) = 0.0 ; sv3dsums(1:2) = 0.0
+  do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+    if (CS%umask(I,J) == 1) then
+      sum_vec_3d(I,J,1) = resid2_scale * Ru(I,J)**2
+      sum_vec_3d(I,J,2) = resid_scale  * (Zu(I,J) * Au(I,J))
+    endif
+    if (CS%vmask(I,J) == 1) then
+      sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid2_scale * Rv(I,J)**2
+      sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid_scale  * (Zv(I,J) * Av(I,J))
+    endif
   enddo ; enddo
+  sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
 
-  call pass_vector(u_shlf, v_shlf, G%domain, TO_ALL, BGRID_NE)
-  if (conv_flag == 0) then
-    iters = CS%cg_max_iterations
-  endif
+  r_norm_sq = sv3dsums(1)
+  z_w_sum   = sv3dsums(2)
 
-end subroutine ice_shelf_solve_inner
+  resid0tol2 = CS%cg_tol_newton**2 * r_norm_sq
+  conv_flag = 0
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  !!                                !!
+  !! MAIN CONJUGATE RESIDUAL LOOP   !!
+  !!                                !!
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+  do iter = 1, CS%cg_max_iterations
+
+    ! --- STEP 1: alpha = (z_k, q_k) / (q_k, M^-1 q_k) ---
+    sum_vec_3d(:,:,:) = 0.0 ; sv3dsums(1:2) = 0.0
+    do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+       if (CS%umask(I,J) == 1) then
+           sum_vec_3d(I,J,1) = resid_scale * (Zu(I,J) * Qu(I,J))
+           ! Order matters to prevent float overflow: Q * (Q * IDiag)
+           sum_vec_3d(I,J,2) = resid_scale * (Qu(I,J) * (Qu(I,J) * IDIAGu(I,J)))
+       endif
+       if (CS%vmask(I,J) == 1) then
+           sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid_scale * (Zv(I,J) * Qv(I,J))
+           sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid_scale * (Qv(I,J) * (Qv(I,J) * IDIAGv(I,J)))
+       endif
+    enddo ; enddo
+    sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
+
+    z_q_sum = sv3dsums(1)
+    q_s_sum = sv3dsums(2)
+
+    if (q_s_sum == 0.0) then
+      iters = iter
+      conv_flag = 1
+      exit
+    endif
+    alpha = z_q_sum / q_s_sum
+
+    ! --- STEP 2: Update x, r, and z (Fused over Full Domain) ---
+    ! Zu halos are populated here since the loop covers Jsdq..Jedq; no pass_vector needed.
+    do J=Jsdq,Jedq ; do I=Isdq,Iedq
+       if (CS%umask(I,J) == 1) then
+           u_shlf(I,J) = u_shlf(I,J) + alpha * Du(I,J)
+           Ru(I,J) = Ru(I,J) - alpha * Qu(I,J)
+           Zu(I,J) = Ru(I,J) * IDIAGu(I,J)
+       endif
+       if (CS%vmask(I,J) == 1) then
+           v_shlf(I,J) = v_shlf(I,J) + alpha * Dv(I,J)
+           Rv(I,J) = Rv(I,J) - alpha * Qv(I,J)
+           Zv(I,J) = Rv(I,J) * IDIAGv(I,J)
+       endif
+    enddo ; enddo
+
+    ! --- STEP 3: w_{k+1} = A z_{k+1} ---
+    Au(:,:) = 0 ; Av(:,:) = 0
+    call CG_action(CS, Au, Av, Zu, Zv, Phi, Phisub, CS%umask, CS%vmask, hmask, &
+                   H_node, CS%ice_visc, float_cond, CS%bed_elev, CS%basal_traction, &
+                   G, US, isc-1, iec+1, jsc-1, jec+1, rhoi_rhow)
+    call pass_vector(Au, Av, G%domain, TO_ALL, BGRID_NE)
+
+    ! --- STEP 4: beta and convergence check ---
+    sum_vec_3d(:,:,:) = 0.0 ; sv3dsums(1:2) = 0.0
+    do J=Jscq_sv,Jecq ; do I=Iscq_sv,Iecq
+       if (CS%umask(I,J) == 1) then
+           sum_vec_3d(I,J,1) = resid2_scale * Ru(I,J)**2
+           sum_vec_3d(I,J,2) = resid_scale  * (Zu(I,J) * Au(I,J))
+       endif
+       if (CS%vmask(I,J) == 1) then
+           sum_vec_3d(I,J,1) = sum_vec_3d(I,J,1) + resid2_scale * Rv(I,J)**2
+           sum_vec_3d(I,J,2) = sum_vec_3d(I,J,2) + resid_scale  * (Zv(I,J) * Av(I,J))
+       endif
+    enddo ; enddo
+    sv3dsum = reproducing_sum( sum_vec_3d(:,:,1:2), Is_sum, Ie_sum, Js_sum, Je_sum, sums=sv3dsums(1:2) )
+
+    r_norm_sq = sv3dsums(1)
+    z_w_sum_new = sv3dsums(2)
+
+    if (r_norm_sq <= resid0tol2 .or. z_w_sum==0.0) then
+      iters = iter
+      conv_flag = 1
+      exit
+    endif
+
+    beta = z_w_sum_new / z_w_sum
+    z_w_sum = z_w_sum_new
+
+    ! --- STEP 5: Update p and q ---
+    do J=Jsdq,Jedq ; do I=Isdq,Iedq
+       if (CS%umask(I,J) == 1) then
+           Du(I,J) = Zu(I,J) + beta * Du(I,J)
+           Qu(I,J) = Au(I,J) + beta * Qu(I,J)
+       endif
+       if (CS%vmask(I,J) == 1) then
+           Dv(I,J) = Zv(I,J) + beta * Dv(I,J)
+           Qv(I,J) = Av(I,J) + beta * Qv(I,J)
+       endif
+    enddo ; enddo
+
+  enddo ! end of CR loop
+
+end subroutine ice_shelf_solve_inner_CR
 
 subroutine ice_shelf_advect_thickness_x(CS, G, LB, time_step, hmask, h0, h_after_uflux, uh_ice)
   type(ice_shelf_dyn_CS), intent(in)    :: CS !< A pointer to the ice shelf control structure
@@ -2827,7 +3395,7 @@ subroutine CG_action(CS, uret, vret, u_shlf, v_shlf, Phi, Phisub, umask, vmask,
 
         ! Newton correction for basal drag: compute inner_dot_n once per quadrature point
         if (use_newton) then
-          inner_dot_n = CS%newton_umid(i,j)*uq + CS%newton_vmid(i,j)*vq
+          inner_dot_n = (CS%newton_umid(i,j)*uq) + (CS%newton_vmid(i,j)*vq)
         endif
 
         ! Ratio |J_q|/areaT corrects the uniform-area weight baked into ice_visc for
@@ -2845,13 +3413,13 @@ subroutine CG_action(CS, uret, vret, u_shlf, v_shlf, Phi, Phisub, umask, vmask,
           ! Newton tangent stiffness correction: add (dη/dε_e^2) * (g·δε) * (g·φ_m) term
           if (do_newton_visc) then
             if (umask(Itgt,Jtgt) == 1) uret_qp(iphi,jphi,qp) = uret_qp(iphi,jphi,qp) + &
-             jac_wt * CS%newton_visc_factor(i,j,qpv) * dstrain_n * &
-              ((2.*strx_n + stry_n) * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j) + &
-               strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi),qp,i,j))
+              jac_wt * CS%newton_visc_factor(i,j,qpv) * dstrain_n * &
+              (((2.*strx_n + stry_n) * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j)) + &
+               (strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi),qp,i,j)))
             if (vmask(Itgt,Jtgt) == 1) vret_qp(iphi,jphi,qp) = vret_qp(iphi,jphi,qp) + &
               jac_wt * CS%newton_visc_factor(i,j,qpv) * dstrain_n * &
-              (strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j) + &
-               (2.*stry_n + strx_n) * Phi(2*(2*(jphi-1)+iphi),qp,i,j))
+              ((strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j)) + &
+               ((2.*stry_n + strx_n) * Phi(2*(2*(jphi-1)+iphi),qp,i,j)))
           endif
 
           if (float_cond(i,j) == 0) then
@@ -2940,7 +3508,7 @@ subroutine CG_action(CS, uret, vret, u_shlf, v_shlf, Phi, Phisub, umask, vmask,
 end subroutine CG_action
 
 subroutine CG_action_subgrid_basal(Phisub, H, U, V, bathyT, dens_ratio, Ucontr, Vcontr, &
-                                    dxCv_S, dxCv_N, dyCu_W, dyCu_E, IareaT)
+                                   dxCv_S, dxCv_N, dyCu_W, dyCu_E, IareaT)
   real, dimension(:,:,:,:,:,:), &
                         intent(in)    :: Phisub !< Quadrature structure weights at subgridscale
                                             !! locations for finite element calculations [nondim]
@@ -3003,12 +3571,12 @@ subroutine CG_action_subgrid_basal(Phisub, H, U, V, bathyT, dens_ratio, Ucontr,
       !calculate quadrature point contributions for the sub-cell, to each node
       Ucontr_q(qx,qy) = jac_sub_wt * Phisub(qx,qy,i,j,m,n) * uloc_arr(qx,qy,i,j)
       Vcontr_q(qx,qy) = jac_sub_wt * Phisub(qx,qy,i,j,m,n) * vloc_arr(qx,qy,i,j)
-    enddo; enddo
+    enddo ; enddo
 
     !calculate sub-cell contribution to each node by summing up quadrature point contributions from the sub-cell
     Ucontr_sub(i,j,m,n) = (Ucontr_q(1,1) + Ucontr_q(2,2)) + (Ucontr_q(1,2)+Ucontr_q(2,1))
     Vcontr_sub(i,j,m,n) = (Vcontr_q(1,1) + Vcontr_q(2,2)) + (Vcontr_q(1,2)+Vcontr_q(2,1))
-  enddo; enddo ; enddo ; enddo
+  enddo ; enddo ; enddo ; enddo
 
   !sum up the sub-cell contributions to each node
   do n=1,2 ; do m=1,2
@@ -3170,8 +3738,8 @@ subroutine matrix_diagonal(CS, G, US, float_cond, H_node, ice_visc, basal_trac,
           ! Newton viscosity diagonal correction: newton_visc_factor * (g . grad_phi_m_u)^2
           ! where grad_phi_m_u = [(2*strx+stry)*Phi_xm + strsh/2*Phi_ym] for u-DOF at node m
           if (do_newton_visc) then
-            dstrain_diag_u = (2.*strx_n + stry_n) * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j) + &
-                             strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi),qp,i,j)
+            dstrain_diag_u = ((2.*strx_n + stry_n) * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j)) + &
+                             (strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi),qp,i,j))
             u_diag_qp(iphi,jphi,qp) = u_diag_qp(iphi,jphi,qp) + &
               jac_wt * CS%newton_visc_factor(i,j,qpv) * dstrain_diag_u**2
           endif
@@ -3201,8 +3769,8 @@ subroutine matrix_diagonal(CS, G, US, float_cond, H_node, ice_visc, basal_trac,
 
           ! Newton viscosity diagonal correction for v-DOF: uses [strsh/2*Phi_xm + (2*stry+strx)*Phi_ym]
           if (do_newton_visc) then
-            dstrain_diag_v = strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j) + &
-                             (2.*stry_n + strx_n) * Phi(2*(2*(jphi-1)+iphi),qp,i,j)
+            dstrain_diag_v = (strsh_n * 0.5 * Phi(2*(2*(jphi-1)+iphi)-1,qp,i,j)) + &
+                             ((2.*stry_n + strx_n) * Phi(2*(2*(jphi-1)+iphi),qp,i,j))
             v_diag_qp(iphi,jphi,qp) = v_diag_qp(iphi,jphi,qp) + &
               jac_wt * CS%newton_visc_factor(i,j,qpv) * dstrain_diag_v**2
           endif
@@ -3283,8 +3851,8 @@ subroutine matrix_diagonal(CS, G, US, float_cond, H_node, ice_visc, basal_trac,
 
 end subroutine matrix_diagonal
 
-subroutine CG_diagonal_subgrid_basal (Phisub, H_node, bathyT, dens_ratio, f_grnd, &
-                                       dxCv_S, dxCv_N, dyCu_W, dyCu_E, IareaT)
+subroutine CG_diagonal_subgrid_basal(Phisub, H_node, bathyT, dens_ratio, f_grnd, &
+                                     dxCv_S, dxCv_N, dyCu_W, dyCu_E, IareaT)
   real, dimension(:,:,:,:,:,:), &
                         intent(in) :: Phisub !< Quadrature structure weights at subgridscale
                                              !! locations for finite element calculations [nondim]