diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index 997b94faf10..9bbba70ea66 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -309,16 +309,16 @@ function _precompile_manual_()
     Base.precompile(Tuple{Type{LobattoLegendreBasis}, Int})
     for RealT in (Float64,)
         Base.precompile(Tuple{Type{LobattoLegendreBasis}, RealT, Int})
-        @assert Base.precompile(Tuple{typeof(Trixi.calc_dhat), Vector{RealT},
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_Dhat), Vector{RealT},
                                       Vector{RealT}})
-        @assert Base.precompile(Tuple{typeof(Trixi.calc_dsplit), Vector{RealT},
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_Dsplit), Vector{RealT},
                                       Vector{RealT}})
         @assert Base.precompile(Tuple{typeof(Trixi.polynomial_derivative_matrix),
                                       Vector{RealT}})
         @assert Base.precompile(Tuple{typeof(Trixi.polynomial_interpolation_matrix),
                                       Vector{RealT}, Vector{RealT}})
         @assert Base.precompile(Tuple{typeof(Trixi.barycentric_weights), Vector{RealT}})
-        @assert Base.precompile(Tuple{typeof(Trixi.calc_lhat), RealT, Vector{RealT},
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_Lhat), RealT, Vector{RealT},
                                       Vector{RealT}})
         @assert Base.precompile(Tuple{typeof(Trixi.lagrange_interpolating_polynomials),
                                       RealT, Vector{RealT}, Vector{RealT}})
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index bfcc09acf5a..173bfb17f48 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -866,9 +866,7 @@ end
         #     In fact, everything can be fast and fine for many cases but some parts
         #     of the RHS evaluation can take *exactly* (!) five seconds randomly...
         #     Hence, this version should only be used when `@threaded` is based on
-        #     `@batch` from Polyester.jl or something similar. Using Polyester.jl
-        #     is probably the best option since everything will be handed over to
-        #     Chris Elrod, one of the best performance software engineers for Julia.
+        #     `@batch` from Polyester.jl or something similar.
         PtrArray(pointer(u_ode),
                  (StaticInt(nvariables(equations)),
                   ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index 7db22c2f271..02313b6397e 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -25,13 +25,12 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES,
     inverse_weights::VectorT
 
     inverse_vandermonde_legendre::InverseVandermondeLegendre
-    boundary_interpolation::BoundaryMatrix # lhat
+    boundary_interpolation::BoundaryMatrix # Compressed form of "Lhat"
 
-    derivative_matrix::DerivativeMatrix # strong form derivative matrix
+    derivative_matrix::DerivativeMatrix # strong form derivative matrix "D"
     derivative_split::DerivativeMatrix # strong form derivative matrix minus boundary terms
     derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split`
-    derivative_dhat::DerivativeMatrix # weak form matrix "dhat",
-    # negative adjoint wrt the SBP dot product
+    derivative_hat::DerivativeMatrix # weak form matrix "Dhat", negative adjoint wrt the SBP dot product
 end
 
 function Adapt.adapt_structure(to, basis::LobattoLegendreBasis)
@@ -45,7 +44,7 @@ function Adapt.adapt_structure(to, basis::LobattoLegendreBasis)
     derivative_matrix = adapt(to, basis.derivative_matrix)
     derivative_split = adapt(to, basis.derivative_split)
     derivative_split_transpose = adapt(to, basis.derivative_split_transpose)
-    derivative_dhat = adapt(to, basis.derivative_dhat)
+    derivative_hat = adapt(to, basis.derivative_hat)
     return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes),
                                 typeof(inverse_vandermonde_legendre),
                                 typeof(boundary_interpolation),
@@ -57,7 +56,7 @@ function Adapt.adapt_structure(to, basis::LobattoLegendreBasis)
                                                            derivative_matrix,
                                                            derivative_split,
                                                            derivative_split_transpose,
-                                                           derivative_dhat)
+                                                           derivative_hat)
 end
 
 function LobattoLegendreBasis(RealT, polydeg::Integer)
@@ -69,13 +68,13 @@ function LobattoLegendreBasis(RealT, polydeg::Integer)
     _, inverse_vandermonde_legendre = vandermonde_legendre(nodes_, RealT)
 
     boundary_interpolation = zeros(RealT, nnodes_, 2)
-    boundary_interpolation[:, 1] = calc_lhat(-one(RealT), nodes_, weights_)
-    boundary_interpolation[:, 2] = calc_lhat(one(RealT), nodes_, weights_)
+    boundary_interpolation[:, 1] = calc_Lhat(-one(RealT), nodes_, weights_)
+    boundary_interpolation[:, 2] = calc_Lhat(one(RealT), nodes_, weights_)
 
     derivative_matrix = polynomial_derivative_matrix(nodes_)
-    derivative_split = calc_dsplit(nodes_, weights_)
+    derivative_split = calc_Dsplit(nodes_, weights_)
     derivative_split_transpose = Matrix(derivative_split')
-    derivative_dhat = calc_dhat(nodes_, weights_)
+    derivative_hat = calc_Dhat(nodes_, weights_)
 
     # Type conversions to enable possible optimizations of runtime performance
     # and latency
@@ -98,7 +97,7 @@ function LobattoLegendreBasis(RealT, polydeg::Integer)
                                                            derivative_matrix,
                                                            derivative_split,
                                                            derivative_split_transpose,
-                                                           derivative_dhat)
+                                                           derivative_hat)
 end
 LobattoLegendreBasis(polydeg::Integer) = LobattoLegendreBasis(Float64, polydeg)
 
@@ -410,45 +409,50 @@ end
 
 # TODO: Taal refactor, allow other RealT below and adapt constructors above accordingly
 
-# Calculate the Dhat matrix
-function calc_dhat(nodes, weights)
+# Calculate the Dhat matrix = -M^{-1} D^T M for weak form differentiation.
+# Note that this is the negated version of the matrix that shows up on the RHS of the
+# DG update multiplying the physical flux evaluations.
+function calc_Dhat(nodes, weights)
     n_nodes = length(nodes)
-    dhat = Matrix(polynomial_derivative_matrix(nodes)')
+    Dhat = Matrix(polynomial_derivative_matrix(nodes)')
 
+    # Perform M matrix multplicaitons and negate
     for n in 1:n_nodes, j in 1:n_nodes
-        dhat[j, n] *= -weights[n] / weights[j]
+        Dhat[j, n] *= -weights[n] / weights[j]
     end
 
-    return dhat
+    return Dhat
 end
 
-# Calculate the Dsplit matrix for split-form differentiation: dplit = 2D - M⁻¹B
-function calc_dsplit(nodes, weights)
+# Calculate the Dsplit matrix for split-form differentiation: Dsplit = 2D - M⁻¹B
+# Note that this is the negated version of the matrix that shows up on the RHS of the 
+# DG update multiplying the two-point numerical volume flux evaluations.
+function calc_Dsplit(nodes, weights)
     # Start with 2 x the normal D matrix
-    dsplit = 2 .* polynomial_derivative_matrix(nodes)
+    Dsplit = 2 .* polynomial_derivative_matrix(nodes)
 
-    # Modify to account for
-    dsplit[1, 1] += 1 / weights[1]
-    dsplit[end, end] -= 1 / weights[end]
+    # Modify to account for the weighted boundary terms
+    Dsplit[1, 1] += 1 / weights[1] # B[1, 1] = -1
+    Dsplit[end, end] -= 1 / weights[end] # B[end, end] = 1
 
-    return dsplit
+    return Dsplit
 end
 
 # Calculate the polynomial derivative matrix D.
 # This implements algorithm 37 "PolynomialDerivativeMatrix" from Kopriva's book.
 function polynomial_derivative_matrix(nodes)
     n_nodes = length(nodes)
-    d = zeros(eltype(nodes), n_nodes, n_nodes)
+    D = zeros(eltype(nodes), n_nodes, n_nodes)
     wbary = barycentric_weights(nodes)
 
     for i in 1:n_nodes, j in 1:n_nodes
         if j != i
-            d[i, j] = (wbary[j] / wbary[i]) * 1 / (nodes[i] - nodes[j])
-            d[i, i] -= d[i, j]
+            D[i, j] = (wbary[j] / wbary[i]) * 1 / (nodes[i] - nodes[j])
+            D[i, i] -= D[i, j]
         end
     end
 
-    return d
+    return D
 end
 
 # Calculate and interpolation matrix (Vandermonde matrix) between two given sets of nodes
@@ -525,18 +529,19 @@ function barycentric_weights(nodes)
     return weights
 end
 
-# Calculate Lhat.
-function calc_lhat(x, nodes, weights)
+# Calculate M^{-1} * L(x), where L(x) is the Lagrange polynomial
+# vector at point x.
+function calc_Lhat(x, nodes, weights)
     n_nodes = length(nodes)
     wbary = barycentric_weights(nodes)
 
-    lhat = lagrange_interpolating_polynomials(x, nodes, wbary)
+    Lhat = lagrange_interpolating_polynomials(x, nodes, wbary)
 
     for i in 1:n_nodes
-        lhat[i] /= weights[i]
+        Lhat[i] /= weights[i]
     end
 
-    return lhat
+    return Lhat
 end
 
 """
diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
index cdef079a860..71cf57f60aa 100644
--- a/src/solvers/dgsem_p4est/dg_2d.jl
+++ b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -765,7 +765,10 @@ function calc_surface_integral!(du, u,
     @unpack surface_flux_values = cache.elements
 
     # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index 346a6fc886d..96749522582 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -367,7 +367,7 @@ function calc_volume_integral!(du, flux_viscous,
                                mesh::P4estMesh{2},
                                equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-    (; derivative_dhat) = dg.basis
+    (; derivative_hat) = dg.basis
     (; contravariant_vectors) = cache.elements
     flux_viscous_x, flux_viscous_y = flux_viscous
 
@@ -385,7 +385,7 @@ function calc_volume_integral!(du, flux_viscous,
                                                   i, j, element)
             contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(du, derivative_hat[ii, i],
                                            contravariant_flux1,
                                            equations_parabolic, dg, ii, j, element)
             end
@@ -396,7 +396,7 @@ function calc_volume_integral!(du, flux_viscous,
                                                   i, j, element)
             contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(du, derivative_hat[jj, j],
                                            contravariant_flux2,
                                            equations_parabolic, dg, i, jj, element)
             end
@@ -772,7 +772,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                         mesh::P4estMesh{2}, # for dispatch only
                                         equations_parabolic::AbstractEquationsParabolic,
                                         dg::DG, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     @unpack contravariant_vectors = cache.elements
     gradients_x, gradients_y = gradients
 
@@ -784,13 +784,13 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                    i, j, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(gradients_x, derivative_hat[ii, i],
                                            u_node, equations_parabolic, dg,
                                            ii, j, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(gradients_y, derivative_hat[jj, j],
                                            u_node, equations_parabolic, dg,
                                            i, jj, element)
             end
@@ -964,7 +964,6 @@ function calc_gradient_surface_integral!(gradients,
 
     gradients_x, gradients_y = gradients
 
-    # Access the factors only once before beginning the loop to increase performance.
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index f4efd03ed00..0f1b0d55d9a 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -935,7 +935,10 @@ function calc_surface_integral!(du, u,
     @unpack surface_flux_values = cache.elements
 
     # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
index 14fa38d3ac2..2b35a84d189 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
@@ -206,7 +206,7 @@ function calc_volume_integral!(du, flux_viscous,
                                mesh::P4estMesh{3},
                                equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-    (; derivative_dhat) = dg.basis
+    (; derivative_hat) = dg.basis
     (; contravariant_vectors) = cache.elements
     flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
 
@@ -226,7 +226,7 @@ function calc_volume_integral!(du, flux_viscous,
                                                         i, j, k, element)
             contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(du, derivative_hat[ii, i],
                                            contravariant_flux1,
                                            equations_parabolic, dg, ii, j, k, element)
             end
@@ -237,7 +237,7 @@ function calc_volume_integral!(du, flux_viscous,
                                                         i, j, k, element)
             contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(du, derivative_hat[jj, j],
                                            contravariant_flux2,
                                            equations_parabolic, dg, i, jj, k, element)
             end
@@ -248,7 +248,7 @@ function calc_volume_integral!(du, flux_viscous,
                                                         i, j, k, element)
             contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
             for kk in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[kk, k],
+                multiply_add_to_node_vars!(du, derivative_hat[kk, k],
                                            contravariant_flux3,
                                            equations_parabolic, dg, i, j, kk, element)
             end
@@ -695,7 +695,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                         mesh::P4estMesh{3}, # for dispatch only
                                         equations_parabolic::AbstractEquationsParabolic,
                                         dg::DG, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     @unpack contravariant_vectors = cache.elements
     gradients_x, gradients_y, gradients_z = gradients
 
@@ -707,19 +707,19 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                    i, j, k, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(gradients_x, derivative_hat[ii, i],
                                            u_node, equations_parabolic, dg,
                                            ii, j, k, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(gradients_y, derivative_hat[jj, j],
                                            u_node, equations_parabolic, dg,
                                            i, jj, k, element)
             end
 
             for kk in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k],
+                multiply_add_to_node_vars!(gradients_z, derivative_hat[kk, k],
                                            u_node, equations_parabolic, dg,
                                            i, j, kk, element)
             end
diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl
index cb98c45aed3..672dbe65ebf 100644
--- a/src/solvers/dgsem_structured/dg_1d.jl
+++ b/src/solvers/dgsem_structured/dg_1d.jl
@@ -75,6 +75,9 @@ function apply_jacobian!(du, mesh::StructuredMesh{1},
 
     @threaded for element in eachelement(dg, cache)
         for i in eachnode(dg)
+            # Negative sign included to account for the negated surface and volume terms,
+            # see e.g. the computation of `derivative_hat` in the basis setup and 
+            # the comment in `calc_surface_integral!`.
             factor = -inverse_jacobian[i, element]
 
             for v in eachvariable(equations)
diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl
index 043d7ca019b..75dbfe9fe71 100644
--- a/src/solvers/dgsem_structured/dg_2d.jl
+++ b/src/solvers/dgsem_structured/dg_2d.jl
@@ -36,7 +36,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
     # This can (hopefully) be optimized away due to constant propagation.
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     @unpack contravariant_vectors = cache.elements
 
     for j in eachnode(dg), i in eachnode(dg)
@@ -50,7 +50,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
         Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
         contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
         for ii in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i],
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[ii, i],
                                        contravariant_flux1, equations, dg,
                                        ii, j, element)
         end
@@ -60,7 +60,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
         Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
         contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
         for jj in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j],
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[jj, j],
                                        contravariant_flux2, equations, dg,
                                        i, jj, element)
         end
@@ -723,6 +723,9 @@ function apply_jacobian!(du,
 
     @threaded for element in eachelement(dg, cache)
         for j in eachnode(dg), i in eachnode(dg)
+            # Negative sign included to account for the negated surface and volume terms,
+            # see e.g. the computation of `derivative_hat` in the basis setup and 
+            # the comment in `calc_surface_integral!`.
             factor = -inverse_jacobian[i, j, element]
 
             for v in eachvariable(equations)
diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl
index e6590ec4cb9..7de87ad75e6 100644
--- a/src/solvers/dgsem_structured/dg_3d.jl
+++ b/src/solvers/dgsem_structured/dg_3d.jl
@@ -38,7 +38,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
     # This can (hopefully) be optimized away due to constant propagation.
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     @unpack contravariant_vectors = cache.elements
 
     for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
@@ -54,7 +54,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                                     i, j, k, element)
         contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
         for ii in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i],
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[ii, i],
                                        contravariant_flux1, equations, dg,
                                        ii, j, k, element)
         end
@@ -65,7 +65,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                                     i, j, k, element)
         contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
         for jj in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j],
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[jj, j],
                                        contravariant_flux2, equations, dg,
                                        i, jj, k, element)
         end
@@ -76,7 +76,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                                     i, j, k, element)
         contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
         for kk in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k],
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[kk, k],
                                        contravariant_flux3, equations, dg,
                                        i, j, kk, element)
         end
@@ -818,9 +818,14 @@ end
 function apply_jacobian!(du,
                          mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
                          equations, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-            factor = -cache.elements.inverse_jacobian[i, j, k, element]
+            # Negative sign included to account for the negated surface and volume terms,
+            # see e.g. the computation of `derivative_hat` in the basis setup and 
+            # the comment in `calc_surface_integral!`.
+            factor = -inverse_jacobian[i, j, k, element]
 
             for v in eachvariable(equations)
                 du[v, i, j, k, element] *= factor
diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl
index bd3211910a5..7cba51f1247 100644
--- a/src/solvers/dgsem_tree/dg_1d.jl
+++ b/src/solvers/dgsem_tree/dg_1d.jl
@@ -122,14 +122,14 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
     # This can (hopefully) be optimized away due to constant propagation.
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
 
     for i in eachnode(dg)
         u_node = get_node_vars(u, equations, dg, i, element)
 
         flux1 = flux(u_node, 1, equations)
         for ii in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[ii, i], flux1,
                                        equations, dg, ii, element)
         end
     end
@@ -590,8 +590,10 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1
     @unpack boundary_interpolation = dg.basis
     @unpack surface_flux_values = cache.elements
 
-    # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
@@ -616,6 +618,9 @@ function apply_jacobian!(du, mesh::TreeMesh{1},
     @unpack inverse_jacobian = cache.elements
 
     @threaded for element in eachelement(dg, cache)
+        # Negative sign included to account for the negated surface and volume terms,
+        # see e.g. the computation of `derivative_hat` in the basis setup and 
+        # the comment in `calc_surface_integral!`.
         factor = -inverse_jacobian[element]
 
         for i in eachnode(dg)
diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
index 9b1149f5826..2afdfb07650 100644
--- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
@@ -153,7 +153,7 @@ function calc_volume_integral!(du, flux_viscous,
                                mesh::TreeMesh{1},
                                equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
 
     @threaded for element in eachelement(dg, cache)
         # Calculate volume terms in one element
@@ -162,7 +162,7 @@ function calc_volume_integral!(du, flux_viscous,
                                         element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                multiply_add_to_node_vars!(du, derivative_hat[ii, i], flux_1_node,
                                            equations_parabolic, dg, ii, element)
             end
         end
@@ -388,7 +388,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                         mesh::TreeMesh{1}, # for dispatch only
                                         equations_parabolic::AbstractEquationsParabolic,
                                         dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
 
     @threaded for element in eachelement(dg, cache)
         # Calculate volume terms in one element,
@@ -398,7 +398,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                    i, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(gradients, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(gradients, derivative_hat[ii, i],
                                            u_node, equations_parabolic, dg,
                                            ii, element)
             end
@@ -449,7 +449,6 @@ function calc_gradient_surface_integral!(gradients,
     @unpack surface_flux_values = cache.elements
 
     # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index d59061fd77f..ff75acb3773 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -182,7 +182,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
     # This can (hopefully) be optimized away due to constant propagation.
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
 
     # Calculate volume terms in one element
     for j in eachnode(dg), i in eachnode(dg)
@@ -190,13 +190,13 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
 
         flux1 = flux(u_node, 1, equations)
         for ii in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[ii, i], flux1,
                                        equations, dg, ii, j, element)
         end
 
         flux2 = flux(u_node, 2, equations)
         for jj in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[jj, j], flux2,
                                        equations, dg, i, jj, element)
         end
     end
@@ -1086,8 +1086,10 @@ function calc_surface_integral!(du, u,
     @unpack boundary_interpolation = dg.basis
     @unpack surface_flux_values = cache.elements
 
-    # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
@@ -1126,6 +1128,9 @@ function apply_jacobian!(du, mesh::TreeMesh{2},
     @unpack inverse_jacobian = cache.elements
 
     @threaded for element in eachelement(dg, cache)
+        # Negative sign included to account for the negated surface and volume terms,
+        # see e.g. the computation of `derivative_hat` in the basis setup and 
+        # the comment in `calc_surface_integral!`.
         factor = -inverse_jacobian[element]
 
         for j in eachnode(dg), i in eachnode(dg)
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index 941d71d629e..bba7096c61e 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -173,7 +173,7 @@ function calc_volume_integral!(du, flux_viscous,
                                mesh::TreeMesh{2},
                                equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     flux_viscous_x, flux_viscous_y = flux_viscous
 
     @threaded for element in eachelement(dg, cache)
@@ -185,12 +185,12 @@ function calc_volume_integral!(du, flux_viscous,
                                         i, j, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                multiply_add_to_node_vars!(du, derivative_hat[ii, i], flux_1_node,
                                            equations_parabolic, dg, ii, j, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node,
+                multiply_add_to_node_vars!(du, derivative_hat[jj, j], flux_2_node,
                                            equations_parabolic, dg, i, jj, element)
             end
         end
@@ -803,7 +803,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                         mesh::TreeMesh{2}, # for dispatch only
                                         equations_parabolic::AbstractEquationsParabolic,
                                         dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     gradients_x, gradients_y = gradients
 
     @threaded for element in eachelement(dg, cache)
@@ -814,13 +814,13 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                    i, j, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(gradients_x, derivative_hat[ii, i],
                                            u_node, equations_parabolic, dg,
                                            ii, j, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(gradients_y, derivative_hat[jj, j],
                                            u_node, equations_parabolic, dg,
                                            i, jj, element)
             end
@@ -876,7 +876,6 @@ function calc_gradient_surface_integral!(gradients,
     gradients_x, gradients_y = gradients
 
     # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl
index 902f7b9f1e4..f945aff01f9 100644
--- a/src/solvers/dgsem_tree/dg_3d.jl
+++ b/src/solvers/dgsem_tree/dg_3d.jl
@@ -131,26 +131,26 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
     # This can (hopefully) be optimized away due to constant propagation.
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
 
     for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
         u_node = get_node_vars(u, equations, dg, i, j, k, element)
 
         flux1 = flux(u_node, 1, equations)
         for ii in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[ii, i], flux1,
                                        equations, dg, ii, j, k, element)
         end
 
         flux2 = flux(u_node, 2, equations)
         for jj in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[jj, j], flux2,
                                        equations, dg, i, jj, k, element)
         end
 
         flux3 = flux(u_node, 3, equations)
         for kk in eachnode(dg)
-            multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], flux3,
+            multiply_add_to_node_vars!(du, alpha * derivative_hat[kk, k], flux3,
                                        equations, dg, i, j, kk, element)
         end
     end
@@ -1337,7 +1337,10 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3
     @unpack boundary_interpolation = dg.basis
     @unpack surface_flux_values = cache.elements
 
-    # Access the factors only once before beginning the loop to increase performance.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
     # We also use explicit assignments instead of `+=` and `-=` to let `@muladd`
     # turn these into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
@@ -1389,6 +1392,9 @@ function apply_jacobian!(du, mesh::TreeMesh{3},
     @unpack inverse_jacobian = cache.elements
 
     @threaded for element in eachelement(dg, cache)
+        # Negative sign included to account for the negated surface and volume terms,
+        # see e.g. the computation of `derivative_hat` in the basis setup and 
+        # the comment in `calc_surface_integral!`.
         factor = -inverse_jacobian[element]
 
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index 9a9a5296a4f..8af74734baf 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -58,7 +58,7 @@ function calc_volume_integral!(du, flux_viscous,
                                mesh::TreeMesh{3},
                                equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
 
     @threaded for element in eachelement(dg, cache)
@@ -72,17 +72,17 @@ function calc_volume_integral!(du, flux_viscous,
                                         i, j, k, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                multiply_add_to_node_vars!(du, derivative_hat[ii, i], flux_1_node,
                                            equations_parabolic, dg, ii, j, k, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node,
+                multiply_add_to_node_vars!(du, derivative_hat[jj, j], flux_2_node,
                                            equations_parabolic, dg, i, jj, k, element)
             end
 
             for kk in eachnode(dg)
-                multiply_add_to_node_vars!(du, derivative_dhat[kk, k], flux_3_node,
+                multiply_add_to_node_vars!(du, derivative_hat[kk, k], flux_3_node,
                                            equations_parabolic, dg, i, j, kk, element)
             end
         end
@@ -938,7 +938,7 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                         mesh::TreeMesh{3}, # for dispatch only
                                         equations_parabolic::AbstractEquationsParabolic,
                                         dg::DGSEM, cache)
-    @unpack derivative_dhat = dg.basis
+    @unpack derivative_hat = dg.basis
     gradients_x, gradients_y, gradients_z = gradients
 
     @threaded for element in eachelement(dg, cache)
@@ -949,19 +949,19 @@ function calc_gradient_volume_integral!(gradients, u_transformed,
                                    i, j, k, element)
 
             for ii in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                multiply_add_to_node_vars!(gradients_x, derivative_hat[ii, i],
                                            u_node, equations_parabolic, dg,
                                            ii, j, k, element)
             end
 
             for jj in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                multiply_add_to_node_vars!(gradients_y, derivative_hat[jj, j],
                                            u_node, equations_parabolic, dg,
                                            i, jj, k, element)
             end
 
             for kk in eachnode(dg)
-                multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k],
+                multiply_add_to_node_vars!(gradients_z, derivative_hat[kk, k],
                                            u_node, equations_parabolic, dg,
                                            i, j, kk, element)
             end
@@ -1019,7 +1019,6 @@ function calc_gradient_surface_integral!(gradients,
     gradients_x, gradients_y, gradients_z = gradients
 
     # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
     # We also use explicit assignments instead of `+=` to let `@muladd` turn these
     # into FMAs (see comment at the top of the file).
     factor_1 = boundary_interpolation[1, 1]
diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index b872bcb304b..8a498b5deb8 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -482,22 +482,31 @@ function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D,
     @unpack boundary_interpolation = dg.basis
     @unpack surface_flux_values = cache.elements
 
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # This computes the **negative** surface integral contribution,
+    # i.e., M^{-1} * boundary_interpolation^T (which is for DGSEM just M^{-1} * B)
+    # and the missing "-" is taken care of by `apply_jacobian!`.
+    #
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
     @threaded for element in eachelement(dg, cache)
         for l in eachnode(dg), v in eachvariable(equations)
             # surface contribution along local sides 2 and 4 (fixed x and y varies)
-            du[v, 1, l, element] += (surface_flux_values[v, l, 4, element]
-                                     *
-                                     boundary_interpolation[1, 1])
-            du[v, nnodes(dg), l, element] += (surface_flux_values[v, l, 2, element]
-                                              *
-                                              boundary_interpolation[nnodes(dg), 2])
+            du[v, 1, l, element] = du[v, 1, l, element] +
+                                   surface_flux_values[v, l, 4, element] *
+                                   factor_1
+            du[v, nnodes(dg), l, element] = du[v, nnodes(dg), l, element] +
+                                            surface_flux_values[v, l, 2, element] *
+                                            factor_2
             # surface contribution along local sides 1 and 3 (fixed y and x varies)
-            du[v, l, 1, element] += (surface_flux_values[v, l, 1, element]
-                                     *
-                                     boundary_interpolation[1, 1])
-            du[v, l, nnodes(dg), element] += (surface_flux_values[v, l, 3, element]
-                                              *
-                                              boundary_interpolation[nnodes(dg), 2])
+            du[v, l, 1, element] = du[v, l, 1, element] +
+                                   surface_flux_values[v, l, 1, element] *
+                                   factor_1
+            du[v, l, nnodes(dg), element] = du[v, l, nnodes(dg), element] +
+                                            surface_flux_values[v, l, 3, element] *
+                                            factor_2
         end
     end