diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 344b8eacc3a..77dbdd5ea06 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -19,3 +19,23 @@ steps: timeout_in_minutes: 60 soft_fail: - exit_status: 3 + + - label: "Metal Julia {{matrix.version}}" + matrix: + setup: + version: + - "1.10" + plugins: + - JuliaCI/julia#v1: + version: "{{matrix.version}}" + - JuliaCI/julia-test#v1: ~ + env: + TRIXI_TEST: "Metal" + agents: + queue: "juliaecosystem" + os: "macos" + arch: "aarch64" + if: build.message !~ /\[skip ci\]/ + timeout_in_minutes: 60 + soft_fail: + - exit_status: 3 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f8e4a3f2e4..ff47696c58e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,6 +85,7 @@ jobs: - performance_specializations - mpi - threaded + - kernelabstractions include: - version: '1.11' os: ubuntu-latest diff --git a/Project.toml b/Project.toml index ee7b35463cb..947aebf931b 100644 --- a/Project.toml +++ b/Project.toml @@ -58,6 +58,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" @@ -65,6 +66,7 @@ SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" TrixiCUDAExt = "CUDA" TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" +TrixiMetalExt = "Metal" TrixiNLsolveExt = "NLsolve" TrixiSparseConnectivityTracerExt = "SparseConnectivityTracer" @@ -90,6 +92,7 @@ LinearAlgebra = "1" LinearMaps = "2.7, 3.0" LoopVectorization = "0.12.171" MPI = "0.20.22" +Metal = "1.9.2" Makie = "0.21, 0.22, 0.23, 0.24" MuladdMacro = "0.2.4" NLsolve = "4.5.1" diff --git a/benchmark/CUDA/Project.toml b/benchmark/CUDA/Project.toml new file mode 100644 index 00000000000..221c03a5947 --- /dev/null +++ b/benchmark/CUDA/Project.toml @@ -0,0 +1,6 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +OrdinaryDiffEqLowStorageRK = "b0944070-b475-4768-8dec-fb6eb410534d" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Trixi = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..de491a3761b --- /dev/null +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,79 @@ +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +function initial_condition_taylor_green_vortex(x, t, + equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + + 1.0 / 16.0 * A^2 * rho * + (cos(2 * x[1]) * cos(2 * x[3]) + + 2 * cos(2 * x[2]) + 2 * cos(2 * x[1]) + cos(2 * x[2]) * cos(2 * x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +initial_condition = initial_condition_taylor_green_vortex + +# TODO Undefined external symbol "log" +#volume_flux = flux_ranocha +volume_flux = flux_lax_friedrichs +solver = DGSEM(polydeg = 5, surface_flux = volume_flux) +# TODO flux diff +#volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = (1.0, 1.0, 1.0) .* pi + +initial_refinement_level = 1 +trees_per_dimension = (4, 4, 4) + +mesh = P4estMesh(trees_per_dimension, polydeg = 1, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + periodicity = true, initial_refinement_level = initial_refinement_level) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 100.0) +ode = semidiscretize(semi, tspan; storage_type = nothing, real_type = nothing) + +summary_callback = SummaryCallback() + +stepsize_callback = StepsizeCallback(cfl = 0.1) + +callbacks = CallbackSet(summary_callback, + stepsize_callback) + +############################################################################### +# run the simulation + +maxiters = 200 +run_profiler = false + +# disable warnings when maxiters is reached +integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, + save_everystep = false, callback = callbacks, + maxiters = maxiters, verbose = false) +if run_profiler + prof_result = CUDA.@profile solve!(integrator) +else + solve!(integrator) + prof_result = nothing +end + +finalize(mesh) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl new file mode 100644 index 00000000000..70c840722af --- /dev/null +++ b/benchmark/CUDA/run.jl @@ -0,0 +1,89 @@ +using Trixi +using CUDA +using TimerOutputs +using JSON + +function main(elixir_path) + + # setup + maxiters = 50 + initial_refinement_level = 3 + storage_type = CuArray + real_type = Float64 + + println("Warming up...") + + # start simulation with tiny final time to trigger compilation + duration_compile = @elapsed begin + trixi_include(elixir_path, + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = real_type) + trixi_include(elixir_path, + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = Float32) + end + + println("Finished warm-up in $duration_compile seconds\n") + println("Starting simulation...") + + # start the real simulation + duration_elixir = @elapsed trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = real_type) + + # store metrics (on every rank!) + metrics = Dict{String, Float64}("elapsed time" => duration_elixir) + + # read TimerOutputs timings + timer = Trixi.timer() + metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer) + metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) + + # compute performance index + nrhscalls = Trixi.ncalls(semi.performance_counter) + walltime = 1.0e-9 * take!(semi.performance_counter) + metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) + + # write json file + open("metrics.out", "w") do f + indent = 2 + JSON.print(f, metrics, indent) + end + + # run profiler + maxiters = 5 + initial_refinement_level = 1 + + println("Running profiler (Float64)...") + trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float64, + run_profiler = true) + + open("profile_float64.txt", "w") do io + show(io, prof_result) + end + + println("Running profiler (Float32)...") + trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float32, + run_profiler = true) + + open("profile_float32.txt", "w") do io + show(io, prof_result) + end +end + +# hardcoded elixir +elixir_path = joinpath(@__DIR__(), "elixir_euler_taylor_green_vortex.jl") + +main(elixir_path) diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md index 9d4dc50c181..70d40dd2f6d 100644 --- a/docs/src/heterogeneous.md +++ b/docs/src/heterogeneous.md @@ -120,9 +120,14 @@ function trixi_rhs_fct(mesh, equations, solver, cache, args) end ``` -1. Put the inner code in a new function `rhs_fct_per_element`. Besides the index - `element`, pass all required fields as arguments, but make sure to `@unpack` them from - their structs in advance. +1. Move the inner code into a new inlined function `rhs_fct_per_element`. + ```julia + @inline function rhs_fct_per_element(..., element, ...) + ... + end + ``` + Besides the index `element`, pass all required fields as arguments, but make sure to + `@unpack` them from their structs in advance. 2. Where `trixi_rhs_fct` is called, get the backend, i.e., the hardware we are currently running on via `trixi_backend(x)`. This will, e.g., work with `u_ode`. Internally, KernelAbstractions.jl's `get_backend` diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 4553d4823ed..9e1fc0dba8e 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -50,9 +50,8 @@ save_solution = SaveSolutionCallback(interval = 100, stepsize_callback = StepsizeCallback(cfl = 1.6) # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver -callbacks = CallbackSet(summary_callback, stepsize_callback) -# TODO: GPU. The `analysis_callback` needs to be updated for GPU support -# analysis_callback, save_solution, stepsize_callback) +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) ############################################################################### # run the simulation diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..c0161c2683a --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,62 @@ +# The same setup as tree_3d_dgsem/elixir_advection_basic.jl +# to verify the P4estMesh implementation against TreeMesh + +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z)) +coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z)) + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1, + periodicity = true) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver; + boundary_conditions = boundary_condition_periodic) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.2) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 0.05, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl index 681d2f53a1e..3326c536a76 100644 --- a/ext/TrixiCUDAExt.jl +++ b/ext/TrixiCUDAExt.jl @@ -1,11 +1,19 @@ # Package extension for adding CUDA-based features to Trixi.jl module TrixiCUDAExt -import CUDA: CuArray +import CUDA: CuArray, CuDeviceArray, KernelAdaptor import Trixi function Trixi.storage_type(::Type{<:CuArray}) return CuArray end +function Trixi.unsafe_wrap_or_alloc(::KernelAdaptor, vec, size) + return Trixi.unsafe_wrap_or_alloc(CuDeviceArray, vec, size) +end + +function Trixi.unsafe_wrap_or_alloc(::Type{<:CuDeviceArray}, vec::CuDeviceArray, size) + return reshape(vec, size) +end + end diff --git a/ext/TrixiMetalExt.jl b/ext/TrixiMetalExt.jl new file mode 100644 index 00000000000..b1d2be8b926 --- /dev/null +++ b/ext/TrixiMetalExt.jl @@ -0,0 +1,19 @@ +# Package extension for adding Metal-based features to Trixi.jl +module TrixiMetalExt + +import Metal: MtlArray, MtlDeviceArray, Adaptor +import Trixi + +function Trixi.storage_type(::Type{<:MtlArray}) + return MtlArray +end + +function Trixi.unsafe_wrap_or_alloc(::Adaptor, vec, size) + return Trixi.unsafe_wrap_or_alloc(MtlDeviceArray, vec, size) +end + +function Trixi.unsafe_wrap_or_alloc(::Type{<:MtlDeviceArray}, vec::MtlDeviceArray, size) + return reshape(vec, size) +end + +end diff --git a/src/Trixi.jl b/src/Trixi.jl index 7707b8434bc..6394bd34da2 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,7 +59,8 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace -using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend +using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend, + allocate using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl index e53df1dd4c3..83bd9746848 100644 --- a/src/callbacks_step/analysis_dg1d.jl +++ b/src/callbacks_step/analysis_dg1d.jl @@ -124,7 +124,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{1}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{1}}, equations, dg::DGSEM, + cache, args...) where {Func} @unpack weights = dg.basis @@ -142,9 +143,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{1}, + meshT::Type{<:AbstractMesh{1}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, element) du_node = get_node_vars(du, equations, dg, i, element) @@ -155,7 +156,8 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, equations, dg::DGSEM, cache, args...) where {Func} u_left = get_node_vars(u, equations, dg, 1, element) diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index 5035e831eed..96e640f8d62 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -138,7 +138,7 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, @@ -146,9 +146,19 @@ function calc_error_norms(func, u, t, analyzer, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -190,7 +200,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{2}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{2}}, equations, dg::DGSEM, + cache, args...) where {Func} @unpack weights = dg.basis @@ -208,9 +219,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{2}, + meshT::Type{<:AbstractMesh{2}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, j, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, element) du_node = get_node_vars(du, equations, dg, i, j, element) @@ -221,7 +232,7 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::TreeMesh{2}, equations, dg::DGSEM, + ::Type{<:TreeMesh{2}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack weights = dg.basis @@ -250,11 +261,11 @@ end # Note: `get_normal_direction` already returns an outward-pointing normal for all directions, # thus no +- flips are needed here. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack contravariant_vectors = cache.elements @@ -327,13 +338,23 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, equations, dg, args...)) @@ -343,7 +364,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, element])) integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] @@ -388,10 +409,19 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::Union{DGSEM, FDSBP}, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + + # Calculate # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, element, equations, dg, du diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 634db48de29..2578fa7174a 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -161,14 +161,24 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -214,7 +224,8 @@ end # This avoids the need to divide the RHS of the DG scheme by the Jacobian when computing # the time derivative of entropy, see `entropy_change_reference_element`. function integrate_reference_element(func::Func, u, element, - mesh::AbstractMesh{3}, equations, dg::DGSEM, cache, + ::Type{<:AbstractMesh{3}}, equations, dg::DGSEM, + cache, args...; normalize = true) where {Func} @unpack weights = dg.basis @@ -232,9 +243,9 @@ end # Calculate ∫_e (∂S/∂u ⋅ ∂u/∂t) dΩ_e where the result on element 'e' is kept in reference space # Note that ∂S/∂u = w(u) with entropy variables w function entropy_change_reference_element(du, u, element, - mesh::AbstractMesh{3}, + meshT::Type{<:AbstractMesh{3}}, equations, dg::DGSEM, cache, args...) - return integrate_reference_element(u, element, mesh, equations, dg, cache, + return integrate_reference_element(u, element, meshT, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du u_node = get_node_vars(u, equations, dg, i, j, k, element) du_node = get_node_vars(du, equations, dg, i, j, k, element) @@ -245,7 +256,7 @@ end # calculate surface integral of func(u, equations) * normal on the reference element. function surface_integral_reference_element(func::Func, u, element, - mesh::TreeMesh{3}, equations, dg::DGSEM, + ::Type{<:TreeMesh{3}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack weights = dg.basis @@ -281,8 +292,9 @@ end # Note: `get_normal_direction` already returns an outward-pointing normal for all directions, # thus no +- flips are needed here. function surface_integral_reference_element(func::Func, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, equations, dg::DGSEM, cache, args...) where {Func} @unpack contravariant_vectors = cache.elements @@ -377,12 +389,22 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) @@ -392,7 +414,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, k, element])) integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] @@ -438,10 +460,18 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::Union{DGSEM, FDSBP}, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index 6e002d2eb23..68110a3b257 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -287,6 +287,11 @@ end element_variables = Dict{Symbol, Any}(), node_variables = Dict{Symbol, Any}(); system = "") + # TODO GPU currently on CPU + backend = trixi_backend(u_ode) + if backend !== nothing + u_ode = Array(u_ode) + end mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array_native(u_ode, mesh, equations, solver, cache) save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index febda8252b7..c3d8238ed0a 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -143,8 +143,9 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - return cfl_advective(t) * max_dt(u, t, mesh, + return cfl_advective(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -154,8 +155,9 @@ function calculate_dt(u_ode, t, cfl_advective::Real, cfl_diffusive::Real, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - return cfl_advective * max_dt(u, t, mesh, + return cfl_advective * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -167,14 +169,15 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, equations_parabolic = semi.equations_parabolic u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - dt_advective = cfl_advective(t) * max_dt(u, t, mesh, + dt_advective = cfl_advective(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) cfl_diff = cfl_diffusive(t) if cfl_diff > 0 # Check if diffusive CFL should be considered - dt_diffusive = cfl_diff * max_dt(u, t, mesh, + dt_diffusive = cfl_diff * max_dt(backend, u, t, mesh, have_constant_diffusivity(equations_parabolic), equations, equations_parabolic, solver, cache) diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index 8a029543575..f445fc79c88 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere @@ -28,7 +28,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -53,7 +53,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, @@ -73,7 +73,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh, # for all dimensions +function max_dt(backend::Nothing, u, t, mesh::TreeMesh, # for all dimensions constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -95,7 +95,7 @@ function max_dt(u, t, mesh::TreeMesh, # for all dimensions return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere @@ -122,7 +122,7 @@ function max_dt(u, t, mesh::StructuredMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index db1565d9a16..35374ed7028 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -29,7 +29,7 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -53,7 +53,7 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -72,34 +72,34 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::TreeMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] @@ -110,45 +110,111 @@ end # Thus, there is no `max_dt` function for `TreeMeshParallel{2}` and # `equations_parabolic::AbstractEquationsParabolic` implemented. -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = zero(max_scaled_speed) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - lambda1, lambda2 = max_abs_speeds(u_node, equations) + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, + equations, dg, contravariant_vectors, + inverse_jacobian, element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end + return 2 / (nnodes(dg) * max_scaled_speed) +end - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - inv_jacobian = abs(inverse_jacobian[i, j, element]) +# works for both constant and non-constant speed +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed, equations, + dg::DG, contravariant_vectors, + inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_per_element(u, mT, constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end - max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) - max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) - end +@inline function max_scaled_speed_per_element(u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_lambda1 = max_lambda2 = zero(eltype(u)) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + lambda1, lambda2 = max_abs_speeds(u_node, equations) + + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + + max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) + max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) + end + return max_lambda1 + max_lambda2 +end + +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::True, equations, dg::DG, cache) + max_scaled_speed = nextfloat(zero(t)) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) + max_scaled_speed_loc = max_scaled_speed_per_element(u, typeof(mesh), + constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda1 + max_lambda2) + max_scaled_speed = Base.max(max_scaled_speed, max_scaled_speed_loc) end return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -194,41 +260,54 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, +function max_dt(backend::Backend, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - # Avoid division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - +function max_scaled_speed_per_element(u, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_scaled_speed = zero(eltype(u)) max_lambda1, max_lambda2 = max_abs_speeds(equations) + for j in eachnode(dg), i in eachnode(dg) + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) + inv_jacobian = abs(inverse_jacobian[i, j, element]) - inv_jacobian = abs(inverse_jacobian[i, j, element]) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - inv_jacobian * - (lambda1_transformed + lambda2_transformed)) - end + max_scaled_speed = Base.max(max_scaled_speed, + inv_jacobian * + (lambda1_transformed + lambda2_transformed)) end - return 2 / (nnodes(dg) * max_scaled_speed) + return max_scaled_speed end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{2}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, @@ -273,68 +352,68 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::P4estMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::P4estMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::T8codeMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::T8codeMeshParallel{2}, +function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index d310be1675e..55645aa92b2 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -31,7 +31,7 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -56,7 +56,7 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -76,7 +76,8 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::False, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -85,38 +86,75 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + max_lambda = max_scaled_speed_element(u, typeof(mesh), equations, dg, + contravariant_vectors, inverse_jacobian, + element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, - i, j, k, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, - i, j, k, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, - i, j, k, element) - lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + return 2 / (nnodes(dg) * max_scaled_speed) +end - inv_jacobian = abs(inverse_jacobian[i, j, k, element]) +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) - max_lambda1 = Base.max(max_lambda1, inv_jacobian * lambda1_transformed) - max_lambda2 = Base.max(max_lambda2, inv_jacobian * lambda2_transformed) - max_lambda3 = Base.max(max_lambda3, inv_jacobian * lambda3_transformed) - end + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), equations, dg, contravariant_vectors, + inverse_jacobian; + ndrange = num_elements) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - max_lambda1 + max_lambda2 + max_lambda3) - end + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, equations, + dg, contravariant_vectors, inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end + +@inline function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, equations, dg, + contravariant_vectors, inverse_jacobian, + element) + max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, + i, j, k, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, + i, j, k, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, + i, j, k, element) + lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) + max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) + max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) + end + return max_lambda1 + max_lambda2 + max_lambda3 +end + +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, @@ -168,13 +206,19 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +function max_dt(backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::True, equations, dg::DG, cache) # Avoid division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) @unpack contravariant_vectors, inverse_jacobian = cache.elements + if backend !== nothing + # TODO: Port to GPU + contravariant_vectors = Array(cache.elements.contravariant_vectors) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + end max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) @@ -207,7 +251,7 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::P4estMesh{3}, # Parabolic terms currently only for `TreeMesh` and `P4estMesh` constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, @@ -258,68 +302,68 @@ function max_dt(u, t, return 4 / (nnodes(dg) * max_scaled_diffusivity) end -function max_dt(u, t, mesh::P4estMeshParallel{3}, +function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::P4estMeshParallel{3}, +function max_dt(backend::Nothing, u, t, mesh::P4estMeshParallel{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::T8codeMeshParallel{3}, +function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::T8codeMeshParallel{3}, +function max_dt(backend::Nothing, u, t, mesh::T8codeMeshParallel{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl index 661fcdfd5ad..161af1b079b 100644 --- a/src/semidiscretization/semidiscretization_euler_gravity.jl +++ b/src/semidiscretization/semidiscretization_euler_gravity.jl @@ -301,6 +301,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) u_euler = wrap_array(u_ode, semi_euler) u_gravity = wrap_array(cache.u_ode, semi_gravity) du_gravity = wrap_array(cache.du_ode, semi_gravity) + backend = trixi_backend(u_ode) # set up main loop finalstep = false @@ -312,7 +313,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) @unpack equations = semi_gravity while !finalstep dtau = @trixi_timeit timer() "calculate dtau" begin - cfl * max_dt(u_gravity, tau, semi_gravity.mesh, + cfl * max_dt(backend, u_gravity, tau, semi_gravity.mesh, have_constant_speed(equations), equations, semi_gravity.solver, semi_gravity.cache) end diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index cd20fbf8e3f..17e4ad4ef65 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -577,10 +577,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index c730439017c..cab2626783c 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -369,10 +369,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter.counters[1], runtime) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 8a11429766a..978ee5cfe39 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -1056,6 +1056,13 @@ end return u_ll, u_rr end +# As above but dispatches on an type argument +@inline function get_surface_node_vars(u, equations, ::Type{<:DG}, indices...) + u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations)))) + return u_ll, u_rr +end + @inline function set_node_vars!(u, u_node, equations, solver::DG, indices...) for v in eachvariable(equations) u[v, indices...] = u_node[v] @@ -1218,55 +1225,46 @@ function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{ return nothing end -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Nothing, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) @unpack node_coordinates = cache.elements + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) @threaded for element in eachelement(dg, cache) compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, - element) + element, node_indices) end return nothing end -function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Backend, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) nelements(dg, cache) == 0 && return nothing + @unpack node_coordinates = cache.elements - kernel! = compute_coefficients_kernel!(backend) - kernel!(u, func, t, equations, dg, node_coordinates, + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) + + kernel! = compute_coefficients_KAkernel!(backend) + kernel!(u, func, t, equations, dg, node_coordinates, node_indices, ndrange = nelements(dg, cache)) return nothing end -@kernel function compute_coefficients_kernel!(u, func, t, equations, - dg::DG, node_coordinates) +@kernel function compute_coefficients_KAkernel!(u, func, t, equations, + dg::DG, node_coordinates, node_indices) element = @index(Global) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, - element) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element, + node_indices) end -function compute_coefficients_element!(u, func, t, equations, dg::DG, - node_coordinates, element) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(node_coordinates, equations, dg, i, - j, element) +@inline function compute_coefficients_element!(u, func, t, equations, dg::DG, + node_coordinates, element, node_indices) + for indices in node_indices + x_node = get_node_coords(node_coordinates, equations, dg, indices, element) u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) - end - - return nothing -end - -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{3}, - equations, dg::DG, cache) - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, - j, k, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, k, element) - end + set_node_vars!(u, u_node, equations, dg, indices, element) end return nothing diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 4f302a196a9..dddbcb1a4e6 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -239,7 +239,7 @@ function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge}) end # for the stepsize callback -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, @@ -268,7 +268,7 @@ function max_dt(u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend::Nothing, u, t, mesh::DGMultiMesh, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DGMulti{NDIMS}, @@ -297,7 +297,7 @@ function max_dt(u, t, mesh::DGMultiMesh, end # for the stepsize callback -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -320,7 +320,7 @@ function max_dt(u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -727,7 +727,7 @@ function calc_sources!(du, u, t, source_terms, return nothing end -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMulti, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 56d4e1f3752..e78d0fbee98 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -631,7 +631,7 @@ end # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!` # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3). # Also called by DGMultiFluxDiff{<:GaussSBP} solvers. -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) @@ -676,7 +676,7 @@ end # integral, e.g., an entropy conservative/stable discretization. The implementation of `rhs!` # for such schemes is very similar to the implementation of `rhs!` for standard DG methods, # but specializes `calc_volume_integral`. -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiffSBP, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index 5185a7dbec5..4bd95de7e7a 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -598,7 +598,7 @@ end # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`, # since `invert_jacobian!` is also used in other places (e.g., parabolic terms). -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 502e31c9f36..9a7865948dc 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -8,44 +8,44 @@ # The following `volume_integral_kernel!` and `calc_volume_integral!` functions are # dimension and meshtype agnostic, i.e., valid for all 1D, 2D, and 3D meshes. -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg, cache, alpha = true) - weak_form_kernel!(du, u, element, mesh, + weak_form_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, dg, cache, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg, cache, alpha = true) @unpack volume_flux = volume_integral # Volume integral specific data - flux_differencing_kernel!(du, u, element, mesh, + flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_flux, dg, cache, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache, alpha = true) @unpack volume_flux_fv = volume_integral # Volume integral specific data - fv_kernel!(du, u, mesh, + fv_kernel!(du, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, cache, element, alpha) return nothing end -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolumeO2, dg::DGSEM, cache, alpha = true) @@ -53,7 +53,7 @@ end @unpack (sc_interface_coords, volume_flux_fv, reconstruction_mode, slope_limiter, cons2recon, recon2cons) = volume_integral - fvO2_kernel!(du, u, mesh, + fvO2_kernel!(du, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -63,14 +63,14 @@ end return nothing end -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralAdaptive{<:IndicatorEntropyChange}, dg::DGSEM, cache) @unpack volume_integral_default, volume_integral_stabilized, indicator = volume_integral @unpack maximum_entropy_increase = indicator - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -79,11 +79,11 @@ end # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_default = -entropy_change_reference_element(du, u, element, - mesh, equations, dg, cache) + meshT, equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - mesh, equations, dg, cache) + meshT, equations, dg, cache) entropy_change = dS_default - dS_true if entropy_change > maximum_entropy_increase # Recompute using EC FD volume integral @@ -92,7 +92,7 @@ end # before any surface terms are added. du[.., element] .= zero(eltype(du)) - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) end @@ -100,7 +100,7 @@ end return nothing end -@inline function volume_integral_kernel!(du, u, element, mesh, +@inline function volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral::VolumeIntegralEntropyCorrection, dg::DGSEM, cache) @@ -110,7 +110,7 @@ end du_element_threaded = indicator.cache.volume_integral_values_threaded # run default volume integral - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -125,12 +125,12 @@ end # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_volume_integral = -entropy_change_reference_element(du, u, element, - mesh, equations, + meshT, equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - mesh, equations, dg, cache) + meshT, equations, dg, cache) # This quantity should be ≤ 0 for an entropy stable volume integral, and # exactly zero for an entropy conservative volume integral. @@ -147,13 +147,13 @@ end du[.., element] .= zero(eltype(du)) # Calculate entropy stable volume integral contribution - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) dS_volume_integral_stabilized = -entropy_change_reference_element(du, u, element, - mesh, + meshT, equations, dg, cache) @@ -177,11 +177,11 @@ end return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral, dg::DGSEM, cache) @threaded for element in eachelement(dg, cache) - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral, dg, cache) end @@ -189,7 +189,27 @@ function calc_volume_integral!(du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Backend, du, u, mesh, + have_nonconservative_terms, equations, + volume_integral, dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + kernel! = volume_integral_KAkernel!(backend) + kernel_cache = kernel_filter_cache(cache) + kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, + volume_integral, dg, kernel_cache, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function volume_integral_KAkernel!(du, u, meshT, + have_nonconservative_terms, equations, + volume_integral, dg::DGSEM, cache) + element = @index(Global) + volume_integral_kernel!(du, u, element, meshT, have_nonconservative_terms, + equations, volume_integral, dg, cache) +end + +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHGType, dg::DGSEM, cache) @@ -210,18 +230,18 @@ function calc_volume_integral!(du, u, mesh, dg_only = isapprox(alpha_element, 0, atol = atol) if dg_only - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_default, dg, cache) else # Calculate DG volume integral contribution - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_blend_high_order, dg, cache, 1 - alpha_element) # Calculate FV volume integral contribution - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_blend_low_order, dg, cache, alpha_element) @@ -231,7 +251,7 @@ function calc_volume_integral!(du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralEntropyCorrectionShockCapturingCombined, dg::DGSEM, cache) @@ -250,7 +270,7 @@ function calc_volume_integral!(du, u, mesh, @threaded for element in eachelement(dg, cache) # run default volume integral - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_default, dg, cache) @@ -265,12 +285,12 @@ function calc_volume_integral!(du, u, mesh, # No scaling by inverse Jacobian here, as there is no Jacobian multiplication # in `integrate_reference_element`. dS_volume_integral = -entropy_change_reference_element(du, u, element, - mesh, equations, + typeof(mesh), equations, dg, cache) # Compute true entropy change given by surface integral of the entropy potential dS_true = surface_integral_reference_element(entropy_potential, u, element, - mesh, equations, dg, cache) + typeof(mesh), equations, dg, cache) # This quantity should be ≤ 0 for an entropy stable volume integral, and # exactly zero for an entropy conservative volume integral. @@ -287,13 +307,13 @@ function calc_volume_integral!(du, u, mesh, du[.., element] .= zero(eltype(du)) # Calculate entropy stable volume integral contribution - volume_integral_kernel!(du, u, element, mesh, + volume_integral_kernel!(du, u, element, typeof(mesh), have_nonconservative_terms, equations, volume_integral_stabilized, dg, cache) dS_volume_integral_stabilized = -entropy_change_reference_element(du, u, element, - mesh, + typeof(mesh), equations, dg, cache) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 7ac6febf470..3f86fff2bb9 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -936,6 +936,24 @@ end end end +@inline function indices2direction2d(indices) + if indices[1] === :begin + return 1 + elseif indices[1] === :end + return 2 + elseif indices[2] === :begin + return 3 + else # if indices[2] === :end + return 4 + end +end + +# Build a reduced cache which can be passed to GPU kernels +@inline function kernel_filter_cache(cache) + return (; + elements = (; contravariant_vectors = cache.elements.contravariant_vectors)) +end + include("containers_2d.jl") include("containers_3d.jl") include("containers_parallel.jl") diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index d4cd609c6bd..ec48d5d204a 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -62,125 +62,218 @@ end end end -function prolong2interfaces!(cache, u, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, +function prolong2interfaces!(backend::Nothing, cache, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] + prolong2interfaces_per_interface!(interfaces.u, u, interface, typeof(mesh), + equations, neighbor_ids, node_indices, + index_range) + end + return nothing +end + +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG) + @unpack interfaces = cache + ninterfaces(interfaces) == 0 && return nothing + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) + + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interfaces_u, u, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, + node_indices, index_range) + interface = @index(Global) + prolong2interfaces_per_interface!(interfaces_u, u, interface, mT, equations, + neighbor_ids, node_indices, index_range) +end + +@inline function prolong2interfaces_per_interface!(interfaces_u, u, interface, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, + node_indices, + index_range) + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[1, v, i, interface] = u[v, i_primary, j_primary, + primary_element] + end + i_primary += i_primary_step + j_primary += j_primary_step + end - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range) - i_primary = i_primary_start - j_primary = j_primary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, - primary_element] - end - i_primary += i_primary_step - j_primary += j_primary_step + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[2, v, i, interface] = u[v, i_secondary, j_secondary, + secondary_element] end + i_secondary += i_secondary_step + j_secondary += j_secondary_step + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] + return nothing +end - i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], - index_range) +function calc_interface_flux!(backend::Nothing, surface_flux_values, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) - i_secondary = i_secondary_start - j_secondary = j_secondary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, - secondary_element] - end - i_secondary += i_secondary_step - j_secondary += j_secondary_step - end + @threaded for interface in eachinterface(dg, cache) + calc_interface_flux_per_interface!(surface_flux_values, typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) end return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Backend, surface_flux_values, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) + ninterfaces(cache.interfaces) == 0 && return nothing @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, + equations, surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange = ninterfaces(cache.interfaces)) + + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, + st::Type{<:DG}, u_interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_per_interface!(surface_flux_values, mt, + have_nonconservative_terms, equations, + surface_integral, st, u_interface, + interface, neighbor_ids, node_indices, + contravariant_vectors, index_range) +end + +@inline function calc_interface_flux_per_interface!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, + st::Type{<:DG}, + u_interface, interface, + neighbor_ids, + node_indices, contravariant_vectors, + index_range) index_end = last(index_range) - @threaded for interface in eachinterface(dg, cache) - # Get element and side index information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) + # Get element and side index information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction2d(primary_indices) - # Create the local i,j indexing on the primary element used to pull normal direction information - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) - i_primary = i_primary_start - j_primary = j_primary_start - - # Get element and side index information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - - # Initiate the secondary index to be used in the surface for loop. - # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices - node_secondary = index_end - node_secondary_step = -1 - else - node_secondary = 1 - node_secondary_step = 1 - end + i_primary = i_primary_start + j_primary = j_primary_start - for node in eachnode(dg) - # Get the normal direction on the primary element. - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - node, primary_direction, primary_element, - node_secondary, secondary_direction, secondary_element) - - # Increment primary element indices to pull the normal direction - i_primary += i_primary_step - j_primary += j_primary_step - # Increment the surface node index along the secondary element - node_secondary += node_secondary_step - end + # Get element and side index information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction2d(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if :i_backward in secondary_indices + node_secondary = index_end + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end + + for node in index_range + # Get the normal direction on the primary element. + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, mt, have_nonconservative_terms, + equations, surface_integral, st, u_interface, interface, + normal_direction, node, primary_direction, + primary_element, node_secondary, + secondary_direction, secondary_element) + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step end return nothing @@ -188,19 +281,22 @@ end # Inlined version of the interface flux computation for conservation laws @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, - T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, have_nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, + primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -215,20 +311,22 @@ end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + meshT::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) @unpack surface_flux = surface_integral - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_flux, equations), equations, - surface_integral, dg, cache, + surface_integral, st, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, @@ -238,20 +336,19 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, - surface_integral, dg::DG, cache, + surface_integral, st::Type{<:DG}, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -265,32 +362,31 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (flux_[v] + - 0.5f0 * - noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] + + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + 0.5f0 * - noncons_secondary[v]) + noncons_primary[v]) + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + + 0.5f0 * + noncons_secondary[v])) end return nothing end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, - surface_integral, dg::DG, cache, + surface_integral, st::Type{<:DG}, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, primary_node_index, interface_index) flux_left, flux_right = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -756,7 +852,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -764,6 +860,51 @@ function calc_surface_integral!(du, u, @unpack inverse_weights = dg.basis @unpack surface_flux_values = cache.elements + @threaded for element in eachelement(dg, cache) + calc_surface_integral_per_element!(du, typeof(mesh), equations, + surface_integral, dg, inverse_weights[1], + surface_flux_values, element) + end +end + +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack inverse_weights = dg.basis + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, inverse_weights[1], + surface_flux_values, ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function calc_surface_integral_KAkernel!(du, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, + surface_flux_values) + element = @index(Global) + calc_surface_integral_per_element!(du, mT, equations, surface_integral, + dg, factor, surface_flux_values, element) +end + +@inline function calc_surface_integral_per_element!(du, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, + surface_flux_values, + element) # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for Gauss-Lobatto DGSEM just M^{-1} * B) @@ -771,33 +912,32 @@ function calc_surface_integral!(du, u, # # We also use explicit assignments instead of `+=` to let `@muladd` turn these # into FMAs (see comment at the top of the file). - factor = inverse_weights[1] # For LGL basis: Identical to weighted boundary interpolation at x = ±1 - @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = (du[v, 1, l, element] + - surface_flux_values[v, l, 1, element] * - factor) - - # surface at +x - du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor) - - # surface at -y - du[v, l, 1, element] = (du[v, l, 1, element] + - surface_flux_values[v, l, 3, element] * - factor) - - # surface at +y - du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor) - end + # + # factor = inverse_weights[1] + # For LGL basis: Identical to weighted boundary interpolation at x = ±1 + for l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor) + + # surface at +x + du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + + surface_flux_values[v, l, 2, element] * + factor) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor) + + # surface at +y + du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + + surface_flux_values[v, l, 4, element] * + factor) end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 35372eecea7..d8565b6d22a 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -123,7 +123,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{P4estMesh{2}, P4estMesh{3}}, # Calculate surface integrals. # This reuses `calc_surface_integral!` for the purely hyperbolic case. @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache) end @@ -158,7 +158,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces. # This reuses `prolong2interfaces` for the purely hyperbolic case. @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u_transformed, mesh, + prolong2interfaces!(nothing, cache, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl index f77d625f57e..2cefabd9539 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl @@ -140,7 +140,7 @@ end @inline function calc_mpi_interface_flux!(surface_flux_values, mesh::Union{P4estMeshParallel{2}, T8codeMeshParallel{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, interface_node_index, local_side, @@ -321,7 +321,7 @@ end @inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh::Union{P4estMeshParallel{2}, T8codeMeshParallel{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, node_index) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index b70661c56ba..07c6b1b6a0b 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -91,85 +91,117 @@ end return (i1, i2) end -function prolong2interfaces!(cache, u, +function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the indices of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, - i_primary, j_primary, - k_primary, - primary_element] - end - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i + prolong2interfaces_interface!(interfaces.u, u, typeof(mesh), equations, + neighbor_ids, node_indices, index_range, + interface) + end + return nothing +end + +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG) + @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) + + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, + ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interface_u, u, meshT, equations, + neighbor_ids, node_indices, index_range) + interface = @index(Global) + prolong2interfaces_interface!(interface_u, u, meshT, equations, neighbor_ids, + node_indices, index_range, interface) +end + +@inline function prolong2interfaces_interface!(u_interface, u, + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, neighbor_ids, node_indices, + index_range, interface) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[1, v, i, j, interface] = u[v, + i_primary, j_primary, + k_primary, + primary_element] end - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], - index_range) - k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], - index_range) - - i_secondary = i_secondary_start - j_secondary = j_secondary_start - k_secondary = k_secondary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, j, interface] = u[v, - i_secondary, j_secondary, - k_secondary, - secondary_element] - end - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - k_secondary += k_secondary_step_i + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + index_range) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[2, v, i, j, interface] = u[v, + i_secondary, j_secondary, + k_secondary, + secondary_element] end - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j - k_secondary += k_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j end - return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) @@ -178,93 +210,140 @@ function calc_interface_flux!(surface_flux_values, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Get element and side information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - - # Get element and side information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - secondary_surface_indices = surface_indices(secondary_indices) - - # Get the surface indexing on the secondary element. - # Note that the indices of the primary side will always run forward but - # the secondary indices might need to run backwards for flipped sides. - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], - index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start + calc_interface_flux_interface!(surface_flux_values, + typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, neighbor_ids, node_indices, + contravariant_vectors, index_range, interface) + end + return nothing +end + +function calc_interface_flux!(backend::Backend, surface_flux_values, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations, + surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange = ninterfaces(cache.interfaces)) + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, meshT, + have_nonconservative_terms, equations, + surface_integral, solverT, u_interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_interface!(surface_flux_values, + meshT, + have_nonconservative_terms, + equations, surface_integral, solverT, u_interface, + neighbor_ids, node_indices, contravariant_vectors, + index_range, interface) +end + +@inline function calc_interface_flux_interface!(surface_flux_values, + meshT::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, + equations, surface_integral, + solverT::Type{<:DG}, u_interface, + neighbor_ids, + node_indices, contravariant_vectors, + index_range, interface) + # Get element and side information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + index_range) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in index_range + for i in index_range + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, + equations, + surface_integral, solverT, u_interface, + interface, normal_direction, + i, j, primary_direction, primary_element, + i_secondary, j_secondary, secondary_direction, + secondary_element) - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction from the primary element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, k_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, - have_nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, primary_direction, primary_element, - i_secondary, j_secondary, secondary_direction, - secondary_element) - - # Increment the primary element indices - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - # Increment the secondary element surface indices - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - end # Increment the primary element indices - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i # Increment the secondary element surface indices - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j end - return nothing end # Inlined function for interface flux computation for conservative flux terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -281,19 +360,21 @@ end # Inlined function for interface flux computation for flux + nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + meshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), - equations, surface_integral, dg, cache, interface_index, + equations, surface_integral, solverT, u_interface, + interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, @@ -302,21 +383,21 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -343,21 +424,22 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, - primary_j_node_index, interface_index) + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, + interface_index) flux_left, flux_right = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -463,7 +545,7 @@ end # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::False, equations, + have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache, i_index, j_index, k_index, i_node_index, j_node_index, direction_index, @@ -499,13 +581,13 @@ end # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, i_index, j_index, k_index, i_node_index, j_node_index, direction_index, element_index, boundary_index) calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh, - nonconservative_terms, + have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), equations, @@ -517,7 +599,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, surface_integral, dg::DG, cache, i_index, j_index, @@ -560,7 +642,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, surface_integral, dg::DG, cache, i_index, j_index, @@ -922,13 +1004,53 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) @unpack inverse_weights = dg.basis @unpack surface_flux_values = cache.elements + @threaded for element in eachelement(dg, cache) + calc_surface_integral_element!(du, typeof(mesh), + equations, surface_integral, + dg, inverse_weights[1], + surface_flux_values, + element) + end + return nothing +end + +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @unpack inverse_weights = dg.basis + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, inverse_weights[1], + surface_flux_values, ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function calc_surface_integral_KAkernel!(du, meshT, equations, + surface_integral, dg, factor, + surface_flux_values) + element = @index(Global) + calc_surface_integral_element!(du, meshT, + equations, surface_integral, dg, factor, + surface_flux_values, element) +end + +@inline function calc_surface_integral_element!(du, + ::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, factor, surface_flux_values, + element) # Note that all fluxes have been computed with outward-pointing normal vectors. # This computes the **negative** surface integral contribution, # i.e., M^{-1} * boundary_interpolation^T (which is for Gauss-Lobatto DGSEM just M^{-1} * B) @@ -936,49 +1058,48 @@ function calc_surface_integral!(du, u, # # We also use explicit assignments instead of `+=` to let `@muladd` turn these # into FMAs (see comment at the top of the file). - factor = inverse_weights[1] # For LGL basis: Identical to weighted boundary interpolation at x = ±1 - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = (du[v, 1, l, m, element] + - surface_flux_values[v, l, m, 1, - element] * - factor) - - # surface at +x - du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + - surface_flux_values[v, l, m, 2, - element] * - factor) - - # surface at -y - du[v, l, 1, m, element] = (du[v, l, 1, m, element] + - surface_flux_values[v, l, m, 3, - element] * - factor) - - # surface at +y - du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + - surface_flux_values[v, l, m, 4, - element] * - factor) - - # surface at -z - du[v, l, m, 1, element] = (du[v, l, m, 1, element] + - surface_flux_values[v, l, m, 5, - element] * - factor) - - # surface at +z - du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + - surface_flux_values[v, l, m, 6, - element] * - factor) - end + # + # factor = inverse_weights[1] + # For LGL basis: Identical to weighted boundary interpolation at x = ±1 + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, + element] * + factor) + + # surface at +x + du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, + element] * + factor) + + # surface at +y + du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, + element] * + factor) + + # surface at +z + du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + + surface_flux_values[v, l, m, 6, + element] * + factor) end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index bb5ee8e9196..d062d1fdee4 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{P4estMeshParallel{3}, T8codeMeshParallel{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -33,19 +33,19 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -95,11 +95,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache) + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, + cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl index ed57b91ce1c..a8036aaf22e 100644 --- a/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl +++ b/src/solvers/dgsem_p4est/dg_3d_subcell_limiters.jl @@ -60,7 +60,7 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - mesh::P4estMesh{3}, + meshT::Type{<:P4estMesh{3}}, nonconservative_terms, equations, volume_integral::VolumeIntegralSubcellLimiting, dg::DGSEM, cache) @@ -77,7 +77,7 @@ end fhat3_L = fhat3_L_threaded[Threads.threadid()] fhat3_R = fhat3_R_threaded[Threads.threadid()] calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh, nonconservative_terms, equations, volume_flux_dg, + u, meshT, nonconservative_terms, equations, volume_flux_dg, dg, element, cache) # low-order FV fluxes @@ -90,13 +90,13 @@ end fstar3_L = fstar3_L_threaded[Threads.threadid()] fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, volume_flux_fv, + u, meshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh, nonconservative_terms, equations, limiter, + u, meshT, nonconservative_terms, equations, limiter, dg, element, cache) # Calculate volume integral contribution of low-order FV flux @@ -119,7 +119,7 @@ end # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) (; contravariant_vectors) = cache.elements @@ -263,7 +263,7 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -550,7 +550,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::False, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes @@ -601,7 +601,7 @@ end fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, - u, mesh::P4estMesh{3}, + u, ::Type{<:P4estMesh{3}}, nonconservative_terms::True, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index cc0243a9989..8a71c7ad884 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -38,7 +38,7 @@ function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionP return nothing end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{StructuredMesh, StructuredMeshView{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -47,7 +47,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -67,12 +67,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index 672dbe65ebf..57327fa679d 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -69,7 +69,7 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, mesh::StructuredMesh{1}, +function apply_jacobian!(backend::Nothing, du, mesh::StructuredMesh{1}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index b92944fb338..b7cb31c7b0f 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -29,9 +29,10 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 =# @inline function weak_form_kernel!(du, u, element, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -70,10 +71,11 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) @unpack derivative_split = dg.basis @@ -133,13 +135,14 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + meshT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, @@ -149,10 +152,11 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + meshT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -162,7 +166,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -222,10 +226,11 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{2}, - StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, @@ -293,9 +298,9 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack normal_vectors_1, normal_vectors_2 = cache.normal_vectors @@ -335,9 +340,9 @@ end end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -416,9 +421,9 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{StructuredMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, - P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack normal_vectors_1, normal_vectors_2 = cache.normal_vectors @@ -711,26 +716,61 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Negative sign included to account for the negated surface and volume terms, - # see e.g. the computation of `derivative_hat` in the basis setup and - # the comment in `calc_surface_integral!`. - factor = -inverse_jacobian[i, j, element] - - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end - end + apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) end +end +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG, cache) + nelements(dg, cache) == 0 && return nothing + @unpack inverse_jacobian = cache.elements + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange = nelements(dg, cache)) +end + +@kernel function apply_jacobian_KAkernel!(du, + mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian) + element = @index(Global) + apply_jacobian_per_element!(du, mT, equations, dg, inverse_jacobian, element) +end + +@inline function apply_jacobian_per_element!(du, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian, + element) + for j in eachnode(dg), i in eachnode(dg) + # Negative sign included to account for the negated surface and volume terms, + # see e.g. the computation of `derivative_hat` in the basis setup and + # the comment in `calc_surface_integral!`. + factor = -inverse_jacobian[i, j, element] + + for v in eachvariable(equations) + du[v, i, j, element] *= factor + end + end return nothing end end # @muladd diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl index c2956d027b8..508d3c92d82 100644 --- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl @@ -19,26 +19,27 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -82,7 +83,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -155,7 +156,7 @@ contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] @@ -226,20 +227,21 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -248,7 +250,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) @@ -294,7 +296,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for j in eachnode(dg), i in eachnode(dg) contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element] @@ -400,7 +402,7 @@ end contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element] diff --git a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl index f29de2d912d..b0c65960f6c 100644 --- a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl @@ -10,7 +10,7 @@ # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) (; contravariant_vectors) = cache.elements @@ -111,8 +111,8 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, - nonconservative_terms::True, equations, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, + have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, cache) where { @@ -315,8 +315,8 @@ end # The calculation of the non-conservative staggered "fluxes" requires non-conservative # terms that can be written as a product of local and jump contributions. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::Union{StructuredMesh{2}, P4estMesh{2}}, - nonconservative_terms::True, equations, + ::Type{<:Union{StructuredMesh{2}, P4estMesh{2}}}, + have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, cache) where { diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index 7b0414f66fb..146aba255d1 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -4,7 +4,6 @@ # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. @muladd begin #! format: noindent - function create_cache(mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, @@ -32,8 +31,8 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 =# @inline function weak_form_kernel!(du, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -89,8 +88,9 @@ end # mapping terms, stored in `contravariant_vectors`, is peeled apart from the evaluation of # the physical fluxes in each Cartesian direction @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -171,11 +171,12 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) - flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms, + flux_differencing_kernel!(du, u, element, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(volume_flux, equations), equations, volume_flux, dg, cache, alpha) @@ -184,8 +185,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, @@ -195,7 +197,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -274,8 +276,9 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, @@ -369,8 +372,8 @@ end # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack contravariant_vectors = cache.elements @@ -430,8 +433,8 @@ end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) @unpack contravariant_vectors = cache.elements @@ -526,8 +529,8 @@ end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -896,24 +899,48 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - # Negative sign included to account for the negated surface and volume terms, - # see e.g. the computation of `derivative_hat` in the basis setup and - # the comment in `calc_surface_integral!`. - factor = -inverse_jacobian[i, j, k, element] - - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end - end + apply_jacobian_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) end + return nothing +end +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG, cache) + @unpack inverse_jacobian = cache.elements + + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function apply_jacobian_KAkernel!(du, meshT, equations, dg::DG, + inverse_jacobian) + element = @index(Global) + apply_jacobian_element!(du, meshT, equations, dg, inverse_jacobian, element) +end + +@inline function apply_jacobian_element!(du, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, + equations, dg, inverse_jacobian, element) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + # Negative sign included to account for the negated surface and volume terms, + # see e.g. the computation of `derivative_hat` in the basis setup and + # the comment in `calc_surface_integral!`. + factor = -inverse_jacobian[i, j, k, element] + + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor + end + end return nothing end end # @muladd diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl index 8b710417ff7..9143286b88e 100644 --- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl @@ -19,25 +19,26 @@ # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -88,7 +89,7 @@ contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -176,7 +177,7 @@ (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -264,7 +265,7 @@ contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) @@ -351,19 +352,20 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}}, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis @unpack contravariant_vectors = cache.elements + ndims = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -372,7 +374,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -425,7 +427,7 @@ end contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) jk = j + nnodes(dg) * (k - 1) @@ -546,7 +548,7 @@ end (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element] @@ -667,7 +669,7 @@ end contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef, (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)), - StaticInt(ndims(mesh)))) + StaticInt(ndims))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) ij = i + nnodes(dg) * (j - 1) diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index c472e071ab3..74934a1b4b4 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -55,7 +55,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::TreeMesh{1}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -64,7 +64,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -94,12 +94,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -117,7 +118,8 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + element, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -138,7 +140,8 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -167,7 +170,8 @@ end end @inline function flux_differencing_kernel!(du, u, element, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + meshT::Type{<:Union{TreeMesh{1}, + StructuredMesh{1}}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -176,7 +180,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -202,7 +206,7 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded = cache @@ -211,7 +215,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) @@ -228,7 +232,7 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + meshT::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -240,7 +244,7 @@ end # Calculate FV two-point fluxes fstar1_L = fstar1_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, + calcflux_fvO2!(fstar1_L, fstar1_R, u, meshT, nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) @@ -262,7 +266,7 @@ end # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for i in 2:nnodes(dg) @@ -277,7 +281,7 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, u, - mesh::TreeMesh{1}, + ::Type{<:TreeMesh{1}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv @@ -308,7 +312,7 @@ end # "An entropy stable nodal discontinuous Galerkin method for the resistive MHD equations. Part II: Subcell finite volume shock capturing" # [JCP: 2021.110580](https://doi.org/10.1016/j.jcp.2021.110580) @inline function calcflux_fvO2!(fstar1_L, fstar1_R, u, - mesh::Union{TreeMesh{1}, StructuredMesh{1}}, + ::Type{<:Union{TreeMesh{1}, StructuredMesh{1}}}, nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -616,7 +620,8 @@ function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:A return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) @unpack inverse_weights = dg.basis @@ -646,7 +651,8 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1 return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DG{<:GaussLegendreBasis}, cache) @unpack boundary_interpolation_inverse_weights = dg.basis @@ -677,7 +683,7 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1 return nothing end -function apply_jacobian!(du, mesh::TreeMesh{1}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{1}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl index ac06ec4dfac..a6bfa09fdaa 100644 --- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl @@ -107,7 +107,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, # Calculate surface integrals. # This reuses `calc_surface_integral!` for the purely hyperbolic case. @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache) end diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index c1a845ab06d..7b5bc8e7328 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -100,7 +100,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, @@ -111,19 +111,19 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -154,12 +154,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -177,7 +178,7 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -204,7 +205,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -242,7 +243,7 @@ end end end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{2}, +@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -251,7 +252,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -285,9 +286,9 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -301,7 +302,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -322,7 +323,7 @@ end end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -389,9 +390,9 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - UnstructuredMesh2D, P4estMesh{2}, - T8codeMesh{2}}, + meshT::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache @@ -402,7 +403,7 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) @@ -425,7 +426,7 @@ end # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations" # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for j in eachnode(dg), i in 2:nnodes(dg) @@ -448,7 +449,7 @@ end end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::TreeMesh{2}, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv @@ -496,7 +497,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{2}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u @@ -523,7 +525,7 @@ function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, dg::DG) return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{2}, equations, dg::DG{<:GaussLegendreBasis}) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces @@ -564,7 +566,7 @@ function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -598,7 +600,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) @@ -1120,7 +1122,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -1164,7 +1166,7 @@ function calc_surface_integral!(du, u, return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -1215,7 +1217,7 @@ function calc_surface_integral!(du, u, return nothing end -function apply_jacobian!(du, mesh::TreeMesh{2}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{2}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl index 51a5897b065..efcb7cc6794 100644 --- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl @@ -65,24 +65,25 @@ end # muladd # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations)))) @turbo for j in eachnode(dg), i in eachnode(dg) @@ -227,18 +228,19 @@ end # muladd end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{2}, + element, ::Type{<:TreeMesh{2}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations2D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 2 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -247,7 +249,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl index d7e452a97db..05bd3d95fc2 100644 --- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl @@ -128,7 +128,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, TreeMesh{3}}, # Calculate surface integrals. # This reuses `calc_surface_integral!` for the purely hyperbolic case. @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache) end @@ -952,7 +952,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces # This reuses `prolong2interfaces!` for the purely hyperbolic case. @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u_transformed, mesh, + prolong2interfaces!(nothing, cache, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index 37ba51e15bc..8a9dc3d58a1 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -450,7 +450,7 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{TreeMeshParallel{2}, P4estMeshParallel{2}, T8codeMeshParallel{2}}, equations, boundary_conditions, source_terms::Source, @@ -479,7 +479,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -487,12 +487,12 @@ function rhs!(du, u, t, # Prolong solution to interfaces # TODO: Taal decide order of arguments, consistent vs. modified cache first? @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -542,12 +542,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index bccb96a1404..b9d46ddf164 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -58,8 +58,9 @@ end # Subcell limiting currently only implemented for certain mesh types @inline function volume_integral_kernel!(du, u, element, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + meshT::Type{<:Union{TreeMesh{2}, + StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralSubcellLimiting, dg::DGSEM, cache) @@ -73,7 +74,7 @@ end fhat1_R = fhat1_R_threaded[Threads.threadid()] fhat2_L = fhat2_L_threaded[Threads.threadid()] fhat2_R = fhat2_R_threaded[Threads.threadid()] - calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, mesh, + calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_dg, dg, element, cache) @@ -84,14 +85,15 @@ end fstar2_L = fstar2_L_threaded[Threads.threadid()] fstar1_R = fstar1_R_threaded[Threads.threadid()] fstar2_R = fstar2_R_threaded[Threads.threadid()] - calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh, + calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # antidiffusive flux calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, - u, mesh, have_nonconservative_terms, equations, limiter, dg, + u, meshT, have_nonconservative_terms, equations, limiter, + dg, element, cache) # Calculate volume integral contribution of low-order FV flux @@ -112,7 +114,8 @@ end # # See also `flux_differencing_kernel!`. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{2}}, + have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, element, cache) @unpack weights, derivative_split = dg.basis @@ -191,7 +194,7 @@ end # Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf. # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, have_nonconservative_terms::True, + ::Type{<:TreeMesh{2}}, have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -370,7 +373,7 @@ end # The calculation of the non-conservative staggered "fluxes" requires non-conservative # terms that can be written as a product of local and jump contributions. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, - mesh::TreeMesh{2}, nonconservative_terms::True, + ::Type{<:TreeMesh{2}}, nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, @@ -608,8 +611,8 @@ end # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems. @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms::False, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes @@ -645,8 +648,8 @@ end # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems. @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, - mesh::Union{TreeMesh{2}, StructuredMesh{2}, - P4estMesh{2}}, + ::Type{<:Union{TreeMesh{2}, StructuredMesh{2}, + P4estMesh{2}}}, have_nonconservative_terms::True, equations, limiter::SubcellLimiterIDP, dg, element, cache) @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index 41b46f58f95..4aabd232d86 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -126,7 +126,7 @@ This treatment is required to achieve, e.g., entropy-stability or well-balancedn See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# @inline function weak_form_kernel!(du, u, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -158,7 +158,7 @@ See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-17 return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, +@inline function flux_differencing_kernel!(du, u, element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -208,7 +208,7 @@ end return nothing end -@inline function flux_differencing_kernel!(du, u, element, mesh::TreeMesh{3}, +@inline function flux_differencing_kernel!(du, u, element, meshT::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux, dg::DGSEM, cache, alpha = true) # true * [some floating point value] == [exactly the same floating point value] @@ -217,7 +217,7 @@ end symmetric_flux, nonconservative_flux = volume_flux # Apply the symmetric flux as usual - flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, + flux_differencing_kernel!(du, u, element, meshT, False(), equations, symmetric_flux, dg, cache, alpha) # Calculate the remaining volume terms using the nonsymmetric generalized flux @@ -261,8 +261,9 @@ end end @inline function fv_kernel!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, alpha = true) @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache @@ -277,7 +278,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh, have_nonconservative_terms, equations, + meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache) # Calculate FV volume integral contribution @@ -300,8 +301,9 @@ end end @inline function fvO2_kernel!(du, u, - mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, + meshT::Type{<:Union{TreeMesh{3}, StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, have_nonconservative_terms, equations, volume_flux_fv, dg::DGSEM, cache, element, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -321,7 +323,7 @@ end fstar3_R = fstar3_R_threaded[Threads.threadid()] calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh, have_nonconservative_terms, equations, + meshT, have_nonconservative_terms, equations, volume_flux_fv, dg, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, cons2recon, recon2cons) @@ -351,7 +353,7 @@ end # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044) @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache) for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg) @@ -383,7 +385,7 @@ end @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, + ::Type{<:TreeMesh{3}}, have_nonconservative_terms::True, equations, volume_flux_fv, dg::DGSEM, element, cache) volume_flux, nonconservative_flux = volume_flux_fv @@ -447,7 +449,8 @@ end @inline function calcflux_fvO2!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, - mesh::TreeMesh{3}, have_nonconservative_terms::False, + ::Type{<:TreeMesh{3}}, + have_nonconservative_terms::False, equations, volume_flux_fv, dg::DGSEM, element, cache, sc_interface_coords, reconstruction_mode, slope_limiter, @@ -519,7 +522,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{3}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u @@ -557,7 +561,7 @@ function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, dg::DG) return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -592,7 +596,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) @@ -1333,7 +1337,8 @@ end return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) @unpack inverse_weights = dg.basis @@ -1391,7 +1396,7 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3 return nothing end -function apply_jacobian!(du, mesh::TreeMesh{3}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{3}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl index b2c48c9f00a..f1d2573dc79 100644 --- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl +++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl @@ -17,24 +17,25 @@ # if LoopVectorization.jl can handle the array types. This ensures that `@turbo` # works efficiently here. @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_shima_etal_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations)))) @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) @@ -263,18 +264,19 @@ end @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray, - element, mesh::TreeMesh{3}, + element, ::Type{<:TreeMesh{3}}, have_nonconservative_terms::False, equations::CompressibleEulerEquations3D, volume_flux::typeof(flux_ranocha_turbo), dg::DGSEM, cache, alpha) @unpack derivative_split = dg.basis + ndims_mesh = 3 # Create a temporary array that will be used to store the RHS with permuted # indices `[i, j, k, v]` to allow using SIMD instructions. # `StrideArray`s with purely static dimensions do not allocate on the heap. du = StrideArray{eltype(u_cons)}(undef, - (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., + (ntuple(_ -> StaticInt(nnodes(dg)), ndims_mesh)..., StaticInt(nvariables(equations)))) # Convert conserved to primitive variables on the given `element`. In addition @@ -283,7 +285,7 @@ end # values. u_prim = StrideArray{eltype(u_cons)}(undef, (ntuple(_ -> StaticInt(nnodes(dg)), - ndims(mesh))..., + ndims_mesh)..., StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 76a186b9fa6..74053154497 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -35,7 +35,7 @@ function create_cache(mesh::UnstructuredMesh2D, equations, return cache end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::UnstructuredMesh2D, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -44,7 +44,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -80,7 +80,8 @@ function rhs!(du, u, t, # Apply Jacobian from mapping to reference element # Note! this routine is reused from dgsem_structured/dg_2d.jl - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl index 004e6b95c98..002093121a7 100644 --- a/src/solvers/fdsbp_tree/fdsbp_1d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl @@ -42,7 +42,8 @@ function create_cache(mesh::TreeMesh{1}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, mesh::TreeMesh{1}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) @@ -88,7 +89,8 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, mesh::TreeMesh{1}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) @@ -139,7 +141,7 @@ function calc_volume_integral!(du, u, mesh::TreeMesh{1}, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -166,7 +168,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -220,7 +222,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -248,7 +250,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index d370d29aff8..5d30157a28c 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -42,7 +42,8 @@ function create_cache(mesh::Union{TreeMesh{2}, UnstructuredMesh2D}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, mesh::TreeMesh{2}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) @@ -97,7 +98,8 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, mesh::TreeMesh{2}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) @@ -159,7 +161,7 @@ function calc_volume_integral!(du, u, mesh::TreeMesh{2}, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -202,7 +204,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -214,7 +216,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, @@ -260,7 +262,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -304,7 +306,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index 719ef4f7c96..e737a8f2137 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -42,7 +42,8 @@ function create_cache(mesh::TreeMesh{3}, equations, end # 3D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, mesh::TreeMesh{3}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) @@ -104,7 +105,8 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, mesh::TreeMesh{3}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) @@ -181,7 +183,7 @@ function calc_volume_integral!(du, u, mesh::TreeMesh{3}, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -238,7 +240,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -250,7 +252,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, @@ -297,7 +299,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -355,7 +357,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl index 2a10d0a6cea..a9239ada1fa 100644 --- a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl +++ b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl @@ -29,7 +29,8 @@ end # 2D volume integral contributions for `VolumeIntegralStrongForm` # OBS! This is the standard (not de-aliased) form of the volume integral. # So it is not provably stable for variable coefficients due to the the metric terms. -function calc_volume_integral!(du, u, mesh::UnstructuredMesh2D, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, dg::FDSBP, cache) @@ -91,7 +92,8 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, mesh::UnstructuredMesh2D, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, dg::FDSBP, cache) diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl index 5de2164fa3e..414a016bd0d 100644 --- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl +++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl @@ -58,8 +58,9 @@ function calculate_cfl(ode_algorithm::AbstractPairedExplicitRK, ode) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - cfl_number = dt_opt / max_dt(u, t0, mesh, + cfl_number = dt_opt / max_dt(backend, u, t0, mesh, have_constant_speed(equations), equations, solver, cache) return cfl_number diff --git a/test/Project.toml b/test/Project.toml index 48e1efa86ee..69ba19cdad1 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -17,6 +17,7 @@ Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" OrdinaryDiffEqBDF = "6ad6398a-0878-4a85-9266-38940aa047c8" OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8" @@ -60,6 +61,7 @@ Krylov = "0.10" LinearAlgebra = "1" LinearSolve = "3.13" MPI = "0.20.22" +Metal = "1.9.2" NLsolve = "4.5.1" OrdinaryDiffEqBDF = "1.1" OrdinaryDiffEqCore = "1.26, 2, 3" diff --git a/test/runtests.jl b/test/runtests.jl index 8b524a09df8..f4675429064 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using Test using MPI: mpiexec +import Trixi # We run tests in parallel with CI jobs setting the `TRIXI_TEST` environment # variable to determine the subset of tests to execute. @@ -114,9 +115,30 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" import CUDA if CUDA.functional() - include("test_cuda.jl") + include("test_cuda_2d.jl") + include("test_cuda_3d.jl") else @warn "Unable to run CUDA tests on this machine" end end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "Metal" + import Metal + if Metal.functional() + include("test_metal_2d.jl") + else + @warn "Unable to run Metal tests on this machine" + end + end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "kernelabstractions" + previous_backend = Trixi._PREFERENCE_THREADING + Trixi.set_threading_backend!(:kernelabstractions) + # relaunching julia + try + run(`$(Base.julia_cmd()) --threads=$TRIXI_NTHREADS --check-bounds=yes $(abspath("test_kernelabstractions.jl"))`) + finally + Trixi.set_threading_backend!(Symbol(previous_backend)) + end + end end diff --git a/test/test_cuda.jl b/test/test_cuda_2d.jl similarity index 85% rename from test/test_cuda.jl rename to test/test_cuda_2d.jl index fd0adaeece1..f4b1fa1396e 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda_2d.jl @@ -1,15 +1,18 @@ -module TestCUDA +module TestCUDA2D using Test using Trixi include("test_trixi.jl") +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + # Start with a clean environment: remove Trixi.jl output directory if it exists outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") +@testset "CUDA 2D" begin +#! format: noindent @trixi_testset "elixir_advection_basic_gpu.jl native" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), @@ -39,14 +42,13 @@ end using CUDA @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! - l2=nothing, # TODO: GPU. [Float32(8.311947673061856e-6)], - linf=nothing, # TODO: GPU. [Float32(6.627000273229378e-5)], + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], RealT_for_test_tolerances=Float32, real_type=Float32, - storage_type=CuArray, - sol=nothing,) # TODO: GPU. Remove this once we can run the simulation on the GPU - # # Ensure that we do not have excessive memory allocations - # # (e.g., from type instabilities) + storage_type=CuArray) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) # @test_allocations(Trixi.rhs!, semi, sol, 1000) @test real(ode.p.solver) == Float32 @test real(ode.p.solver.basis) == Float32 @@ -65,5 +67,5 @@ end # Clean up afterwards: delete Trixi.jl output directory @test_nowarn isdir(outdir) && rm(outdir, recursive = true) - +end end # module diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl new file mode 100644 index 00000000000..5c6d5a52709 --- /dev/null +++ b/test/test_cuda_3d.jl @@ -0,0 +1,71 @@ +module TestCUDA3D + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_3d_dgsem") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +@testset "CUDA 3D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[0.00016263963870641478], + linf=[0.0014537194925779984]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / CUDA" begin + # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules + using CUDA + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(0.00016263963870641478)], + linf=[Float32(0.0014537194925779984)], + RealT_for_test_tolerances=Float32, + real_type=Float32, + storage_type=CuArray) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa CuArray + @test ode.p.solver.basis.derivative_matrix isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortars) === CuArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) +end +end # module diff --git a/test/test_kernelabstractions.jl b/test/test_kernelabstractions.jl new file mode 100644 index 00000000000..dff169be3bd --- /dev/null +++ b/test/test_kernelabstractions.jl @@ -0,0 +1,79 @@ +module TestExamplesKernelAbstractions + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = examples_dir() + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive = true) +Trixi.MPI.Barrier(Trixi.mpi_comm()) + +@testset "basic" begin + @test Trixi._PREFERENCE_THREADING == :kernelabstractions +end + +@testset "KernelAbstractions CPU 2D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", + "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=8.311947673061856e-6, + linf=6.627000273229378e-5) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_2d_dgsem", + "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], + RealT_for_test_tolerances=Float32, + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) +end +end + +@testset "KernelAbstractions CPU 3D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_3d_dgsem", + "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[0.00016263963870641478], + linf=[0.0014537194925779984]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "p4est_3d_dgsem", + "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(0.00016263963870641478)], + linf=[Float32(0.0014537194925779984)], + RealT_for_test_tolerances=Float32, + real_type=Float32) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) +end +end + +# Clean up afterwards: delete Trixi.jl output directory +Trixi.mpi_isroot() && isdir(outdir) && @test_nowarn rm(outdir, recursive = true) +Trixi.MPI.Barrier(Trixi.mpi_comm()) + +end # module diff --git a/test/test_metal_2d.jl b/test/test_metal_2d.jl new file mode 100644 index 00000000000..7d445712119 --- /dev/null +++ b/test/test_metal_2d.jl @@ -0,0 +1,71 @@ +module TestMetal2D + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +@testset "Metal 2D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=8.311947673061856e-6, + linf=6.627000273229378e-5) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: `mesh` is currently not `adapt`ed correctly + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / Metal" begin + # Using Metal inside the testset since otherwise the bindings are hiddend by the anonymous modules + using Metal + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], + RealT_for_test_tolerances=Float32, + real_type=Float32, + storage_type=MtlArray) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + # @test_allocations(Trixi.rhs!, semi, sol, 1000) + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: `mesh` is currently not `adapt`ed correctly + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa MtlArray + @test ode.p.solver.basis.derivative_matrix isa MtlArray + + @test Trixi.storage_type(ode.p.cache.elements) === MtlArray + @test Trixi.storage_type(ode.p.cache.interfaces) === MtlArray + @test Trixi.storage_type(ode.p.cache.boundaries) === MtlArray + @test Trixi.storage_type(ode.p.cache.mortars) === MtlArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) +end +end # module diff --git a/test/test_performance_specializations_2d.jl b/test/test_performance_specializations_2d.jl index b42a1b8f640..7dceea2b6a7 100644 --- a/test/test_performance_specializations_2d.jl +++ b/test/test_performance_specializations_2d.jl @@ -33,7 +33,7 @@ isdir(outdir) && rm(outdir, recursive = true) # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -43,10 +43,10 @@ isdir(outdir) && rm(outdir, recursive = true) # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -72,7 +72,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -82,10 +82,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -112,7 +112,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -122,10 +122,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] @@ -151,7 +151,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -161,10 +161,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, 1] diff --git a/test/test_performance_specializations_3d.jl b/test/test_performance_specializations_3d.jl index 3b3bd40b2f5..967b0f9cf3e 100644 --- a/test/test_performance_specializations_3d.jl +++ b/test/test_performance_specializations_3d.jl @@ -33,7 +33,7 @@ isdir(outdir) && rm(outdir, recursive = true) # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -43,10 +43,10 @@ isdir(outdir) && rm(outdir, recursive = true) # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -72,7 +72,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -82,10 +82,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -112,7 +112,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -122,10 +122,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1] @@ -151,7 +151,7 @@ end # Call the optimized default version du .= 0 - Trixi.flux_differencing_kernel!(du, u, 1, semi.mesh, + Trixi.flux_differencing_kernel!(du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) @@ -161,10 +161,10 @@ end # `semi.solver.volume_integral.volume_flux` du .= 0 invoke(Trixi.flux_differencing_kernel!, - Tuple{typeof(du), typeof(u), Integer, typeof(semi.mesh), + Tuple{typeof(du), typeof(u), Integer, Type{typeof(semi.mesh)}, typeof(have_nonconservative_terms), typeof(semi.equations), Function, typeof(semi.solver), typeof(semi.cache), Bool}, - du, u, 1, semi.mesh, + du, u, 1, typeof(semi.mesh), have_nonconservative_terms, semi.equations, semi.solver.volume_integral.volume_flux, semi.solver, semi.cache, true) du_baseline = du[:, :, :, :, 1]