diff --git a/src/Trixi.jl b/src/Trixi.jl index 167e01189c8..57eb76ef2f6 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -25,11 +25,12 @@ const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", tru # (standard library packages first, other packages next, all of them sorted alphabetically) using Accessors: @reset -using LinearAlgebra: LinearAlgebra, Diagonal, diag, dot, eigvals, mul!, norm, cross, +using LinearAlgebra: LinearAlgebra, Adjoint, Diagonal, diag, dot, eigvals, mul!, norm, + cross, normalize, I, UniformScaling, det using Printf: @printf, @sprintf, println -using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, droptol!, +using SparseArrays: SparseMatrixCSC, AbstractSparseMatrix, sparse, droptol!, rowvals, nzrange, nonzeros # import @reexport now to make it available for further imports/exports diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 3d7aba9536f..d4779911cee 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -44,6 +44,22 @@ end @inline nelements(dg::DGMulti, cache) = size(cache.solution_container.u_values)[end] +# Returns the components needed to iterate efficiently over the entries of either a +# `SparseMatrixCSC` or `Adjoint{SparseMatrixCSC}`, for example when performing flux +# differencing calculations. +# +# For `Adjoint{SparseMatrixCSC}` (used by `DGMultiFluxDiff`), since `parent(A)` is a +# `SparseMatrixCSC` stored in column-major order, iterating over its columns gives +# row-major access to `A`. +# +# For `SparseMatrixCSC` (used by `DGMultiPeriodicFDSBP`, for example), `parent(A)` +# simply returns `A`. +@inline function sparse_operator_data(A::Union{<:SparseMatrixCSC, + <:Adjoint{<:Any, <:SparseMatrixCSC}}) + A_base = parent(A) + return A_base, axes(A, 2), rowvals(A_base), nonzeros(A_base) +end + """ eachdim(mesh) @@ -370,7 +386,7 @@ function prolong2interfaces!(cache, u, end # CARE: This function requires that interpolation to quadrature points is performed before -# to populate cache.u_values, see `calc_volume_integral!` for `VolumeIntegralWeakForm`. +# to populate cache.solution_container.u_values, see `calc_volume_integral!` for `VolumeIntegralWeakForm`. # version for affine meshes @inline function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms::False, equations, @@ -398,7 +414,7 @@ end end # CARE: This function requires that interpolation to quadrature points is performed before -# to populate cache.u_values, see `calc_volume_integral!` for `VolumeIntegralWeakForm`. +# to populate cache.solution_container.u_values, see `calc_volume_integral!` for `VolumeIntegralWeakForm`. # version for curved meshes @inline function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh{NDIMS, <:NonAffine}, @@ -517,7 +533,7 @@ function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm, return nothing end -# assumes cache.flux_face_values is computed and filled with +# assumes cache.solution_container.flux_face_values is computed and filled with # for polynomial discretizations, use dense LIFT matrix for surface contributions. function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 8d056fe1f36..3ba81d36ae7 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -5,234 +5,6 @@ @muladd begin #! format: noindent -# hadamard_sum!(du, A, -# flux_is_symmetric, volume_flux, -# orientation_or_normal_direction, u, equations) -# -# Computes the flux difference ∑_j A[i, j] * f(u_i, u_j) and accumulates the result into `du`. -# Called by `local_flux_differencing` to compute local contributions to flux differencing -# volume integrals. -# -# - `du`, `u` are vectors -# - `A` is the skew-symmetric flux differencing matrix -# - `flux_is_symmetric` is a `Val{<:Bool}` indicating if f(u_i, u_j) = f(u_j, u_i) -# -# The matrix `A` can be either dense or sparse. In the latter case, you should -# use the `adjoint` of a `SparseMatrixCSC` to mimic a `SparseMatrixCSR`, which -# is more efficient for matrix vector products. - -# Version for dense operators and symmetric fluxes -@inline function hadamard_sum!(du, A, - flux_is_symmetric::True, volume_flux, - orientation_or_normal_direction, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - AF_ij = 2 * A[i, j] * - volume_flux(u_i, u_j, orientation_or_normal_direction, - equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end - end - du[i] = du_i - end -end - -# Version for dense operators and non-symmetric fluxes -@inline function hadamard_sum!(du, A, - flux_is_symmetric::False, volume_flux, - orientation::Integer, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - u_j = u[j] - f_ij = volume_flux(u_i, u_j, orientation, equations) - du_i = du_i + 2 * A[i, j] * f_ij - end - du[i] = du_i - end -end - -@inline function hadamard_sum!(du, A, - flux_is_symmetric::False, volume_flux, - normal_direction::AbstractVector, u, equations) - row_ids, col_ids = axes(A) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for j in col_ids - u_j = u[j] - f_ij = volume_flux(u_i, u_j, normal_direction, equations) - du_i = du_i + 2 * A[i, j] * f_ij - end - du[i] = du_i - end -end - -# Version for sparse operators and symmetric fluxes -@inline function hadamard_sum!(du, - A::LinearAlgebra.Adjoint{<:Any, - <:AbstractSparseMatrixCSC}, - flux_is_symmetric::True, volume_flux, - orientation_or_normal_direction, u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - j = rows[id] - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - A_ij = vals[id] - AF_ij = 2 * A_ij * - volume_flux(u_i, u_j, orientation_or_normal_direction, - equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end - end - du[i] = du_i - end -end - -# Version for sparse operators and symmetric fluxes with curved meshes -@inline function hadamard_sum!(du, - A::LinearAlgebra.Adjoint{<:Any, - <:AbstractSparseMatrixCSC}, - flux_is_symmetric::True, volume_flux, - normal_directions::AbstractVector{<:AbstractVector}, - u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - j = rows[id] - # This routine computes only the upper-triangular part of the hadamard sum (A .* F). - # We avoid computing the lower-triangular part, and instead accumulate those contributions - # while computing the upper-triangular part (using the fact that A is skew-symmetric and F - # is symmetric). - if j > i - u_j = u[j] - A_ij = vals[id] - - # provably entropy stable de-aliasing of geometric terms - normal_direction = 0.5 * (getindex.(normal_directions, i) + - getindex.(normal_directions, j)) - - AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations) - du_i = du_i + AF_ij - du[j] = du[j] - AF_ij - end - end - du[i] = du_i - end -end - -# TODO: DGMulti. Fix for curved meshes. -# Version for sparse operators and non-symmetric fluxes -@inline function hadamard_sum!(du, - A::LinearAlgebra.Adjoint{<:Any, - <:AbstractSparseMatrixCSC}, - flux_is_symmetric::False, volume_flux, - normal_direction::AbstractVector, u, equations) - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) - - for i in row_ids - u_i = u[i] - du_i = du[i] - for id in nzrange(A_base, i) - A_ij = vals[id] - j = rows[id] - u_j = u[j] - f_ij = volume_flux(u_i, u_j, normal_direction, equations) - du_i = du_i + 2 * A_ij * f_ij - end - du[i] = du_i - end -end - -# For DGMulti implementations, we construct "physical" differentiation operators by taking linear -# combinations of reference differentiation operators scaled by geometric change of variables terms. -# We use a lazy evaluation of physical differentiation operators, so that we can compute linear -# combinations of differentiation operators on-the-fly in an allocation-free manner. -@inline function build_lazy_physical_derivative(element, orientation, - mesh::DGMultiMesh{1}, dg, cache, - operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ = mesh.md - # ignore orientation - return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1, element],)) -end - -@inline function build_lazy_physical_derivative(element, orientation, - mesh::DGMultiMesh{2}, dg, cache, - operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ, sxJ, ryJ, syJ = mesh.md - if orientation == 1 - return LazyMatrixLinearCombo(Qrst_skew, - operator_scaling .* - (rxJ[1, element], sxJ[1, element])) - else # if orientation == 2 - return LazyMatrixLinearCombo(Qrst_skew, - operator_scaling .* - (ryJ[1, element], syJ[1, element])) - end -end - -@inline function build_lazy_physical_derivative(element, orientation, - mesh::DGMultiMesh{3}, dg, cache, - operator_scaling = 1.0) - @unpack Qrst_skew = cache - @unpack rxJ, sxJ, txJ, ryJ, syJ, tyJ, rzJ, szJ, tzJ = mesh.md - if orientation == 1 - return LazyMatrixLinearCombo(Qrst_skew, - operator_scaling .* - (rxJ[1, element], sxJ[1, element], - txJ[1, element])) - elseif orientation == 2 - return LazyMatrixLinearCombo(Qrst_skew, - operator_scaling .* - (ryJ[1, element], syJ[1, element], - tyJ[1, element])) - else # if orientation == 3 - return LazyMatrixLinearCombo(Qrst_skew, - operator_scaling .* - (rzJ[1, element], szJ[1, element], - tzJ[1, element])) - end -end - # Return the contravariant basis vector corresponding to the Cartesian # coordinate direction `orientation` in a given `element` of the `mesh`. # The contravariant basis vectors have entries `dx_i / dxhat_j` where @@ -260,6 +32,17 @@ end return SVector{NDIMS}(view.(dxidxhatj[:, orientation], :, element)) end +# For Affine meshes, `get_contravariant_vector` returns an SVector of scalars (constant over the +# element). The normal direction is the same for all node pairs. +@inline get_normal_direction(normal_directions::AbstractVector, i, j) = normal_directions + +# For NonAffine meshes, `get_contravariant_vector` returns an SVector of per-node arrays. +# We average the normals at nodes i and j for provably entropy-stable de-aliasing of geometric terms. +@inline function get_normal_direction(normal_directions::AbstractVector{<:AbstractVector}, + i, j) + return 0.5f0 * (getindex.(normal_directions, i) + getindex.(normal_directions, j)) +end + # use hybridized SBP operators for general flux differencing schemes. function compute_flux_differencing_SBP_matrices(dg::DGMulti) return compute_flux_differencing_SBP_matrices(dg, has_sparse_operators(dg)) @@ -288,6 +71,24 @@ function compute_flux_differencing_SBP_matrices(dg::DGMultiFluxDiffSBP, return Qrst_skew end +# Build element-to-element connectivity from face-to-face connectivity. +# Used for smoothing of shock capturing blending parameters (see `apply_smoothing!`). +function build_element_to_element_connectivity(mesh::DGMultiMesh, dg::DGMulti) + face_to_face_connectivity = mesh.md.FToF + element_to_element_connectivity = similar(face_to_face_connectivity) + for e in axes(face_to_face_connectivity, 2) + for f in axes(face_to_face_connectivity, 1) + neighbor_face_index = face_to_face_connectivity[f, e] + # Reverse-engineer element index from face index. Assumes all elements + # have the same number of faces. + neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + + 1 + element_to_element_connectivity[f, e] = neighbor_element_index + end + end + return element_to_element_connectivity +end + # For flux differencing SBP-type approximations, store solutions in Matrix{SVector{nvars}}. # This results in a slight speedup for `calc_volume_integral!`. function allocate_nested_array(uEltype, nvars, array_dimensions, dg::DGMultiFluxDiffSBP) @@ -306,15 +107,15 @@ function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffSBP, nvars = nvariables(equations) # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing - fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq) - for _ in 1:Threads.maxthreadid()] + du_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq) + for _ in 1:Threads.maxthreadid()] solution_container = initialize_dgmulti_solution_container(mesh, equations, dg, uEltype) return (; md, Qrst_skew, dxidxhatj = md.rstxyzJ, invJ = inv.(md.J), lift_scalings, inv_wq = inv.(rd.wq), - solution_container, fluxdiff_local_threaded) + solution_container, du_local_threaded) end # most general create_cache: works for `DGMultiFluxDiff{<:Polynomial}` @@ -353,10 +154,10 @@ function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff, RealT, for _ in 1:Threads.maxthreadid()] # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing - # The result is then transferred to rhs_local_threaded::StructArray{<:SVector} before - # projecting it and storing it into `du`. - fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total) - for _ in 1:Threads.maxthreadid()] + # The result is then transferred to `rhs_local`, a thread-local element of + # `rhs_local_threaded::StructArray{<:SVector}` before projecting it and storing it into `du`. + du_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total) + for _ in 1:Threads.maxthreadid()] rhs_local_threaded = [allocate_nested_array(uEltype, nvars, (num_quad_points_total,), dg) for _ in 1:Threads.maxthreadid()] @@ -374,7 +175,7 @@ function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff, RealT, invJ = inv.(J), dxidxhatj = interpolated_geometric_terms, entropy_var_values, projected_entropy_var_values, entropy_projected_u_values, - solution_container, fluxdiff_local_threaded, rhs_local_threaded) + solution_container, du_local_threaded, rhs_local_threaded) end # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function. @@ -450,111 +251,170 @@ end function calc_volume_integral!(du, u, mesh::DGMultiMesh, have_nonconservative_terms, equations, - volume_integral, dg::DGMultiFluxDiff, cache) + volume_integral, dg::DGMultiFluxDiff, cache, + alpha = true) # No interpolation performed for general volume integral. # Instead, an element-wise entropy projection (`entropy_projection!`) is performed before, see # `rhs!` for `DGMultiFluxDiff`, which populates `entropy_projected_u_values` @threaded for element in eachelement(mesh, dg, cache) volume_integral_kernel!(du, u, element, mesh, have_nonconservative_terms, equations, - volume_integral, dg, cache) + volume_integral, dg, cache, alpha) end return nothing end -# Computes flux differencing contribution from each Cartesian direction over a single element. -# For dense operators, we do not use sum factorization. -@inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, +# Computes flux differencing contribution over a single element by looping over node pairs (i, j). +# The physical normal direction for each pair is n_ij = geometric_matrix * ref_entries, +# where ref_entries[d] = Qrst_skew[d][i,j]. +# This fuses the NDIMS per-dimension flux +# evaluations of the old dimension-by-dimension loop into a single evaluation per pair. +# Essentially, instead of calculating +# volume_flux(u_i, u_j, 1, equations) * Qx[i, j] + volume_flux(u_i, u_j, 2, equations) * Qy[i, j] + ... +# where Qx[i, j] = dr/dx * Qr[i, j] + ds/dx * Qs[i, j], we can expand out and evaluate +# volume_flux(u_i, u_j, [dr/dx, dr/dy] * Qr[i, j], equations) + +# volume_flux(u_i, u_j, [ds/dx, ds/dy] * Qs[i, j], equations) +# which is slightly faster. +# +# For dense operators (SBP on Line/Tri/Tet), we do not use this sum factorization trick. +@inline function local_flux_differencing!(du_local, u_local, element_index, have_nonconservative_terms::False, volume_flux, has_sparse_operators::False, mesh, equations, dg, cache) - for dim in eachdim(mesh) - Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) - # True() indicates the volume flux is symmetric - hadamard_sum!(fluxdiff_local, Qi_skew, - True(), volume_flux, - dim, u_local, equations) + @unpack Qrst_skew = cache + NDIMS = ndims(mesh) + row_ids = axes(first(Qrst_skew), 1) + geometric_matrix = get_contravariant_matrix(element_index, mesh, cache) + for i in row_ids + u_i = u_local[i] + for j in row_ids + # We use the symmetry of the volume flux and the anti-symmetry + # of the derivative operator to save half of the volume flux + # computations. + if j > i + u_j = u_local[j] + ref_entries = SVector(ntuple(d -> Qrst_skew[d][i, j], Val(NDIMS))) + normal_direction = geometric_matrix * ref_entries + AF_ij = 2 * volume_flux(u_i, u_j, normal_direction, equations) + du_local[i] = du_local[i] + AF_ij + du_local[j] = du_local[j] - AF_ij # Due to skew-symmetry + end + end end end -@inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, +@inline function local_flux_differencing!(du_local, u_local, element_index, have_nonconservative_terms::True, volume_flux, has_sparse_operators::False, mesh, equations, dg, cache) + @unpack Qrst_skew = cache + NDIMS = ndims(mesh) flux_conservative, flux_nonconservative = volume_flux - for dim in eachdim(mesh) - Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache) - # True() indicates the flux is symmetric. - hadamard_sum!(fluxdiff_local, Qi_skew, - True(), flux_conservative, - dim, u_local, equations) - - # The final argument .5 scales the operator by 1/2 for the nonconservative terms. - half_Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, - cache, 0.5) - # False() indicates the flux is non-symmetric. - hadamard_sum!(fluxdiff_local, half_Qi_skew, - False(), flux_nonconservative, - dim, u_local, equations) + row_ids = axes(first(Qrst_skew), 1) + geometric_matrix = get_contravariant_matrix(element_index, mesh, cache) + for i in row_ids + u_i = u_local[i] + for j in row_ids + ref_entries = SVector(ntuple(d -> Qrst_skew[d][i, j], Val(NDIMS))) + normal_direction = geometric_matrix * ref_entries + # We use the symmetry of the volume flux and the anti-symmetry + # of the derivative operator to save half of the volume flux + # computations. + if j > i + u_j = u_local[j] + AF_ij = 2 * flux_conservative(u_i, u_j, normal_direction, equations) + du_local[i] = du_local[i] + AF_ij + du_local[j] = du_local[j] - AF_ij # Due to skew-symmetry + end + # Non-conservative terms use the full (non-symmetric) loop. + # The 0.5f0 factor on the normal direction is necessary for the nonconservative + # fluxes based on the interpretation of global SBP operators. + # See also `calc_interface_flux!` with `have_nonconservative_terms::True` + # in src/solvers/dgsem_tree/dg_1d.jl + f_nc = flux_nonconservative(u_i, u_local[j], 0.5f0 * normal_direction, + equations) + du_local[i] = du_local[i] + 2 * f_nc + end end end # When the operators are sparse, we use the sum-factorization approach to -# computing flux differencing. -@inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, +# computing flux differencing. Each dimension has its own sparse operator with +# its own sparsity pattern (e.g., tensor-product structure on Quad/Hex elements), +# so we loop per-dimension. For each nonzero entry A[i,j] we evaluate the flux once +# and exploit skew-symmetry to accumulate both the (i,j) and (j,i) contributions. +@inline function local_flux_differencing!(du_local, u_local, element_index, have_nonconservative_terms::False, volume_flux, has_sparse_operators::True, mesh, equations, dg, cache) @unpack Qrst_skew = cache for dim in eachdim(mesh) - # There are two ways to write this flux differencing discretization on affine meshes. - # - # 1. Use numerical fluxes in Cartesian directions and sum up the discrete derivative - # operators per coordinate direction accordingly. - # 2. Use discrete derivative operators per coordinate direction and corresponding - # numerical fluxes in arbitrary (non-Cartesian) space directions. - # - # The first option makes it necessary to sum up the individual sparsity - # patterns of each reference coordinate direction. On tensor-product - # elements such as `Quad()` or `Hex()` elements, this increases the number of - # potentially expensive numerical flux evaluations by a factor of `ndims(mesh)`. - # Thus, we use the second option below (which basically corresponds to the - # well-known sum factorization on tensor product elements). - # Note that there is basically no difference for dense derivative operators. - normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) + normal_directions = get_contravariant_vector(element_index, dim, mesh, cache) Q_skew = Qrst_skew[dim] - - # True() indicates the flux is symmetric - hadamard_sum!(fluxdiff_local, Q_skew, - True(), volume_flux, - normal_direction, u_local, equations) + A_base, row_ids, rows, vals = sparse_operator_data(Q_skew) + for i in row_ids + u_i = u_local[i] + du_i = du_local[i] + for id in nzrange(A_base, i) + j = rows[id] + # This routine computes only the upper-triangular part of the hadamard sum (A .* F). + # We avoid computing the lower-triangular part, and instead accumulate those contributions + # while computing the upper-triangular part (using the fact that A is skew-symmetric and F + # is symmetric). + if j > i + u_j = u_local[j] + A_ij = vals[id] + normal_direction_ij = get_normal_direction(normal_directions, i, j) + AF_ij = 2 * A_ij * + volume_flux(u_i, u_j, normal_direction_ij, equations) + du_i = du_i + AF_ij + du_local[j] = du_local[j] - AF_ij # Due to skew-symmetry + end + end + du_local[i] = du_i + end end end -@inline function local_flux_differencing!(fluxdiff_local, u_local, element_index, +@inline function local_flux_differencing!(du_local, u_local, element_index, have_nonconservative_terms::True, volume_flux, has_sparse_operators::True, mesh, equations, dg, cache) @unpack Qrst_skew = cache flux_conservative, flux_nonconservative = volume_flux for dim in eachdim(mesh) - normal_direction = get_contravariant_vector(element_index, dim, mesh, cache) + normal_directions = get_contravariant_vector(element_index, dim, mesh, cache) Q_skew = Qrst_skew[dim] - - # True() indicates the flux is symmetric - hadamard_sum!(fluxdiff_local, Q_skew, - True(), flux_conservative, - normal_direction, u_local, equations) - - # We scale the operator by 1/2 for the nonconservative terms. - half_Q_skew = LazyMatrixLinearCombo((Q_skew,), (0.5,)) - # False() indicates the flux is non-symmetric - hadamard_sum!(fluxdiff_local, half_Q_skew, - False(), flux_nonconservative, - normal_direction, u_local, equations) + A_base, row_ids, rows, vals = sparse_operator_data(Q_skew) + for i in row_ids + u_i = u_local[i] + du_i = du_local[i] + for id in nzrange(A_base, i) + j = rows[id] + A_ij = vals[id] + u_j = u_local[j] + normal_direction_ij = get_normal_direction(normal_directions, i, j) + # Conservative part: exploit skew-symmetry (calculate upper triangular part only). + if j > i + AF_ij = 2 * A_ij * + flux_conservative(u_i, u_j, normal_direction_ij, equations) + du_i = du_i + AF_ij + du_local[j] = du_local[j] - AF_ij # Due to skew-symmetry + end + # Non-conservative terms use the full (non-symmetric) loop. + # The 0.5f0 factor on the normal direction is necessary for the nonconservative + # fluxes based on the interpretation of global SBP operators. + # See also `calc_interface_flux!` with `have_nonconservative_terms::True` + # in src/solvers/dgsem_tree/dg_1d.jl + f_nc = flux_nonconservative(u_i, u_j, 0.5f0 * normal_direction_ij, + equations) + du_i = du_i + 2 * A_ij * f_nc + end + du_local[i] = du_i + end end end @@ -564,27 +424,27 @@ end @inline function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, - dg::DGMultiFluxDiff, cache) + dg::DGMultiFluxDiff, cache, alpha = true) @unpack entropy_projected_u_values, Ph = cache - @unpack fluxdiff_local_threaded, rhs_local_threaded = cache + @unpack du_local_threaded, rhs_local_threaded = cache - fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) + du_local = du_local_threaded[Threads.threadid()] + fill!(du_local, zero(eltype(du_local))) u_local = view(entropy_projected_u_values, :, element) - local_flux_differencing!(fluxdiff_local, u_local, element, + local_flux_differencing!(du_local, u_local, element, have_nonconservative_terms, volume_integral.volume_flux, has_sparse_operators(dg), mesh, equations, dg, cache) - # convert fluxdiff_local::Vector{<:SVector} to StructArray{<:SVector} for faster + # convert du_local::Vector{<:SVector} to StructArray{<:SVector} for faster # apply_to_each_field performance. rhs_local = rhs_local_threaded[Threads.threadid()] - for i in Base.OneTo(length(fluxdiff_local)) - rhs_local[i] = fluxdiff_local[i] + for i in Base.OneTo(length(du_local)) + rhs_local[i] = du_local[i] end - apply_to_each_field(mul_by_accum!(Ph), view(du, :, element), rhs_local) + apply_to_each_field(mul_by_accum!(Ph, alpha), view(du, :, element), rhs_local) return nothing end @@ -592,21 +452,22 @@ end @inline function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, - dg::DGMultiFluxDiffSBP, cache) - @unpack fluxdiff_local_threaded, inv_wq = cache + dg::DGMultiFluxDiffSBP, cache, + alpha = true) + @unpack du_local_threaded, inv_wq = cache - fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) + du_local = du_local_threaded[Threads.threadid()] + fill!(du_local, zero(eltype(du_local))) u_local = view(u, :, element) - local_flux_differencing!(fluxdiff_local, u_local, element, + local_flux_differencing!(du_local, u_local, element, have_nonconservative_terms, volume_integral.volume_flux, has_sparse_operators(dg), mesh, equations, dg, cache) for i in each_quad_node(mesh, dg, cache) - du[i, element] = du[i, element] + fluxdiff_local[i] * inv_wq[i] + du[i, element] = du[i, element] + alpha * du_local[i] * inv_wq[i] end return nothing diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index 48d75938d86..1b8a257d0a2 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -490,7 +490,7 @@ function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, return nothing end -# Assumes cache.flux_face_values is already computed. +# Assumes cache.solution_container.flux_face_values is already computed. # Enables tensor product evaluation of `LIFT isa TensorProductGaussFaceOperator`. function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, surface_integral::SurfaceIntegralWeakForm, @@ -512,29 +512,6 @@ function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations, return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh, - have_nonconservative_terms, equations, - volume_flux, dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha = true) - fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()] - fill!(fluxdiff_local, zero(eltype(fluxdiff_local))) - u_local = view(cache.entropy_projected_u_values, :, element) - - local_flux_differencing!(fluxdiff_local, u_local, element, - have_nonconservative_terms, - volume_flux, has_sparse_operators(dg), - mesh, equations, dg, cache) - - # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}` - # for faster performance when using `apply_to_each_field`. - rhs_local = cache.rhs_local_threaded[Threads.threadid()] - for i in Base.OneTo(length(fluxdiff_local)) - rhs_local[i] = fluxdiff_local[i] - end - - return project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) -end - function project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh::DGMultiMesh, dg::DGMulti, cache, alpha = true) @@ -564,14 +541,26 @@ end function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, - dg::DGMultiFluxDiff{<:GaussSBP}, cache) + dg::DGMultiFluxDiff{<:GaussSBP}, cache, alpha = true) (; volume_flux) = volume_integral - flux_differencing_kernel!(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_flux, dg, cache) + du_local = cache.du_local_threaded[Threads.threadid()] + fill!(du_local, zero(eltype(du_local))) + u_local = view(cache.entropy_projected_u_values, :, element) - return nothing + local_flux_differencing!(du_local, u_local, element, + have_nonconservative_terms, + volume_flux, has_sparse_operators(dg), + mesh, equations, dg, cache) + + # convert `du_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}` + # for faster performance when using `apply_to_each_field`. + rhs_local = cache.rhs_local_threaded[Threads.threadid()] + for i in Base.OneTo(length(du_local)) + rhs_local[i] = du_local[i] + end + + return project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) end # interpolate back to Lobatto nodes after applying the inverse Jacobian at Gauss points diff --git a/src/solvers/dgmulti/sbp.jl b/src/solvers/dgmulti/sbp.jl index bb0aea56b28..2d75b18049f 100644 --- a/src/solvers/dgmulti/sbp.jl +++ b/src/solvers/dgmulti/sbp.jl @@ -207,11 +207,7 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, # This would have to be changed if `have_nonconservative_terms = False()` # because then `volume_flux` is non-symmetric. A = dg.basis.Drst[dim] - - A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids = axes(A, 2) - rows = rowvals(A_base) - vals = nonzeros(A_base) + A_base, row_ids, rows, vals = sparse_operator_data(A) @threaded for i in row_ids u_i = u[i] @@ -233,19 +229,35 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, else # if using two threads or fewer - # Calls `hadamard_sum!``, which uses symmetry to reduce flux evaluations. Symmetry - # is expected to yield about a 2x speedup, so we default to the symmetry-exploiting - # volume integral unless we have >2 threads (which should yield >2 speedup). + # Exploit skew-symmetry to halve the number of flux evaluations (≈2x speedup). + # A = Drst[dim] is skew-symmetric for periodic FD-SBP on uniform grids, so + # A[i,j] = -A[j,i]. The stored CSC value vals[id] = A[j,i] = -A[i,j], hence + # we use -vals[id] to recover A[i,j], matching the multithreaded branch above. for dim in eachdim(mesh) normal_direction = get_contravariant_vector(1, dim, mesh, cache) A = dg.basis.Drst[dim] + A_base, row_ids, rows, vals = sparse_operator_data(A) - # since have_nonconservative_terms::False, - # the volume flux is symmetric. - flux_is_symmetric = True() - hadamard_sum!(du, A, flux_is_symmetric, volume_flux, - normal_direction, u, equations) + for i in row_ids + u_i = u[i] + du_i = du[i] + for id in nzrange(A_base, i) + j = rows[id] + # We use the symmetry of the volume flux and the anti-symmetry + # of the derivative operator to save half of the volume flux + # computations. + if j > i + A_ij = -vals[id] # A[j,i] stored; skew-symmetry: -A[j,i] = A[i,j] + u_j = u[j] + AF_ij = 2 * A_ij * + volume_flux(u_i, u_j, normal_direction, equations) + du_i = du_i + AF_ij + du[j] = du[j] - AF_ij # Due to skew-symmetry + end + end + du[i] = du_i + end end end diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl index d1c9cc3e9d2..b018c7f5894 100644 --- a/src/solvers/dgmulti/shock_capturing.jl +++ b/src/solvers/dgmulti/shock_capturing.jl @@ -7,20 +7,7 @@ function create_cache(mesh::DGMultiMesh{NDIMS}, equations, @assert volume_integral_blend_high_order isa VolumeIntegralFluxDifferencing "DGMulti is currently only compatible with `VolumeIntegralFluxDifferencing` as `volume_integral_blend_high_order`" # `volume_integral_blend_low_order` limited to finite-volume on Gauss-node subcells - # build element to element (element_to_element_connectivity) connectivity for smoothing of - # shock capturing parameters. - face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix - element_to_element_connectivity = similar(face_to_face_connectivity) - for e in axes(face_to_face_connectivity, 2) - for f in axes(face_to_face_connectivity, 1) - neighbor_face_index = face_to_face_connectivity[f, e] - - # reverse-engineer element index from face. Assumes all elements - # have the same number of faces. - neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1 - element_to_element_connectivity[f, e] = neighbor_element_index - end - end + element_to_element_connectivity = build_element_to_element_connectivity(mesh, dg) # create sparse hybridized operators for low order scheme Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis) @@ -196,10 +183,10 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh, dg, cache, 1 - alpha_element) # Calculate "FV" low order volume integral contribution - low_order_flux_differencing_kernel(du, u, element, mesh, - have_nonconservative_terms, equations, - volume_integral_blend_low_order, - dg, cache, alpha_element) + volume_integral_kernel!(du, u, element, mesh, + have_nonconservative_terms, equations, + volume_integral_blend_low_order, + dg, cache, alpha_element) end end @@ -261,57 +248,30 @@ function get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache) get_contravariant_matrix(j, element, mesh, cache)) end -# computes an algebraic low order method with internal dissipation. -# This method is for affine/Cartesian meshes -function low_order_flux_differencing_kernel(du, u, element, - mesh::DGMultiMesh, - have_nonconservative_terms::False, equations, - volume_integral, - dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha = true) - (; volume_flux_fv) = volume_integral - - # accumulates output from flux differencing - rhs_local = cache.rhs_local_threaded[Threads.threadid()] - fill!(rhs_local, zero(eltype(rhs_local))) - - u_local = view(cache.entropy_projected_u_values, :, element) - - # constant over each element - geometric_matrix = get_contravariant_matrix(element, mesh, cache) - - (; sparsity_pattern) = cache - A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) - for i in row_ids - u_i = u_local[i] - du_i = zero(u_i) - for id in nzrange(A_base, i) - j = rows[id] - u_j = u_local[j] - - # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j - reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) - normal_direction_ij = geometric_matrix * reference_operator_entries - - # note that we do not need to normalize `normal_direction_ij` since - # it is typically normalized within the flux computation. - f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations) - du_i = du_i + 2 * f_ij - end - rhs_local[i] = du_i - end +# On affine meshes, the geometric matrix is constant over the element, so we compute it +# once and reuse it for all node pairs (i, j). The compiler is expected to hoist this +# out of the inner loop after inlining. +@inline function get_low_order_geometric_matrix(i, j, element, + mesh::DGMultiMesh{NDIMS, <:Affine}, + cache) where {NDIMS} + return get_contravariant_matrix(element, mesh, cache) +end - # TODO: factor this out to avoid calling it twice during calc_volume_integral! - return project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha) +# On non-affine meshes, we use the average of the geometric matrices at nodes i and j +# for provably entropy-stable de-aliasing of the geometric terms. +@inline function get_low_order_geometric_matrix(i, j, element, + mesh::DGMultiMesh, + cache) + return get_avg_contravariant_matrix(i, j, element, mesh, cache) end -function low_order_flux_differencing_kernel(du, u, element, - mesh::DGMultiMesh{NDIMS, <:NonAffine}, - have_nonconservative_terms::False, equations, - volume_integral, - dg::DGMultiFluxDiff{<:GaussSBP}, - cache, alpha = true) where {NDIMS} +# Calculates the volume integral corresponding to an algebraic low order method. +# This is used, for example, in shock capturing. +function volume_integral_kernel!(du, u, element, mesh::DGMultiMesh, + have_nonconservative_terms::False, equations, + volume_integral::VolumeIntegralPureLGLFiniteVolume, + dg::DGMultiFluxDiff{<:GaussSBP}, cache, + alpha = true) (; volume_flux_fv) = volume_integral # accumulates output from flux differencing @@ -321,8 +281,7 @@ function low_order_flux_differencing_kernel(du, u, element, u_local = view(cache.entropy_projected_u_values, :, element) (; sparsity_pattern) = cache - A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR - row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base) + A_base, row_ids, rows, _ = sparse_operator_data(sparsity_pattern) for i in row_ids u_i = u_local[i] du_i = zero(u_i) @@ -331,7 +290,7 @@ function low_order_flux_differencing_kernel(du, u, element, u_j = u_local[j] # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j - geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache) + geometric_matrix = get_low_order_geometric_matrix(i, j, element, mesh, cache) reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache) normal_direction_ij = geometric_matrix * reference_operator_entries diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl index 627ef809dfe..1b408ad57f6 100644 --- a/src/solvers/dgmulti/types.jl +++ b/src/solvers/dgmulti/types.jl @@ -341,30 +341,6 @@ function DGMultiMesh(dg::DGMulti{NDIMS}, filename::String; return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md, boundary_faces) end -# Matrix type for lazy construction of physical differentiation matrices -# Constructs a lazy linear combination of B = ∑_i coeffs[i] * A[i] -struct LazyMatrixLinearCombo{Tcoeffs, N, Tv, TA <: AbstractMatrix{Tv}} <: - AbstractMatrix{Tv} - matrices::NTuple{N, TA} - coeffs::NTuple{N, Tcoeffs} - function LazyMatrixLinearCombo(matrices, coeffs) - @assert all(matrix -> size(matrix) == size(first(matrices)), matrices) - return new{typeof(first(coeffs)), length(matrices), eltype(first(matrices)), - typeof(first(matrices))}(matrices, coeffs) - end -end -Base.eltype(A::LazyMatrixLinearCombo) = eltype(first(A.matrices)) -Base.IndexStyle(A::LazyMatrixLinearCombo) = IndexCartesian() -Base.size(A::LazyMatrixLinearCombo) = size(first(A.matrices)) - -@inline function Base.getindex(A::LazyMatrixLinearCombo{<:Real, N}, i, j) where {N} - val = zero(eltype(A)) - for k in Base.OneTo(N) - val = val + A.coeffs[k] * getindex(A.matrices[k], i, j) - end - return val -end - # `SimpleKronecker` lazily stores a Kronecker product `kron(ntuple(A, NDIMS)...)`. # This object also allocates some temporary storage to enable the fast computation # of matrix-vector products.