Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.16.2-DEV"
authors = ["Michael Schlottke-Lakemper <michael.schlottke-lakemper@uni-a.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>", "Andrés Rueda-Ramírez <am.rueda@upm.es>"]

[deps]
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
Expand Down Expand Up @@ -72,6 +73,7 @@ TrixiPlotsExt = "Plots"
TrixiSparseConnectivityTracerExt = "SparseConnectivityTracer"

[compat]
AcceleratedKernels = "0.4.3"
Accessors = "0.1.42"
AMDGPU = "2.2.1"
Adapt = "4.4"
Expand Down
4 changes: 2 additions & 2 deletions src/Trixi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect
using FillArrays: Ones, Zeros
using ForwardDiff: ForwardDiff
using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace
using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend,
allocate
using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend
using AcceleratedKernels: AcceleratedKernels
using LinearMaps: LinearMap
if _PREFERENCE_LOOPVECTORIZATION
using LoopVectorization: LoopVectorization, @turbo, indices
Expand Down
100 changes: 69 additions & 31 deletions src/callbacks_step/analysis_dg2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ function calc_error_norms(func, u, t, analyzer,
return l2_error, linf_error
end

function calc_error_norms(func, _u, t, analyzer,
function calc_error_norms(func, u, t, analyzer,
mesh::Union{StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D,
P4estMesh{2}, P4estMeshView{2},
Expand All @@ -147,16 +147,13 @@ function calc_error_norms(func, _u, t, analyzer,
initial_condition, dg::DGSEM, cache, cache_analysis)
@unpack vandermonde, weights = analyzer
@unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
@unpack node_coordinates, inverse_jacobian = cache.elements

# TODO GPU AnalysisCallback currently lives on CPU
backend = trixi_backend(_u)
if backend isa Nothing # TODO GPU KA CPU backend
@unpack node_coordinates, inverse_jacobian = cache.elements
u = _u
else
node_coordinates = Array(cache.elements.node_coordinates)
inverse_jacobian = Array(cache.elements.inverse_jacobian)
u = Array(_u)
# Calculate error norms on the CPU, to ensure the order of summation is the same.
if trixi_backend(u) !== nothing
node_coordinates = Array(node_coordinates)
inverse_jacobian = Array(inverse_jacobian)
u = Array(u)
end

# Set up data structures
Expand Down Expand Up @@ -338,24 +335,26 @@ function integrate_via_indices(func::Func, u,
return integral
end

function integrate_via_indices(func::Func, _u,
function integrate_via_indices(func::Func, u,
mesh::Union{StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D,
P4estMesh{2}, P4estMeshView{2},
T8codeMesh{2}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
# TODO GPU AnalysiCallback currently lives on CPU
backend = trixi_backend(_u)
if backend isa Nothing # TODO GPU KA CPU backend
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements
u = _u
else
weights = Array(dg.basis.weights)
inverse_jacobian = Array(cache.elements.inverse_jacobian)
u = Array(_u)
end
return integrate_via_indices(func, trixi_backend(u), u, mesh, equations, dg, cache,
args...; normalize = normalize)
end

function integrate_via_indices(func::Func, ::Nothing, u,
mesh::Union{StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D,
P4estMesh{2}, P4estMeshView{2},
T8codeMesh{2}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements

# Initialize integral with zeros of the right shape
integral = zero(func(u, 1, 1, 1, equations, dg, args...))
Expand All @@ -380,6 +379,53 @@ function integrate_via_indices(func::Func, _u,
return integral
end

function integrate_via_indices(func::Func, backend::Backend, u,
mesh::Union{StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D,
P4estMesh{2}, P4estMeshView{2},
T8codeMesh{2}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements

function local_plus((integral, total_volume), (integral_element, volume_element))
return (integral + integral_element, total_volume + volume_element)
end

# `func(u, 1,1,1, equations, dg, args...)` might access GPU memory
# so we have to rely on the compiler to correctly infer the type of the integral here.
# TODO: Technically we need device_promote_op here that "infers" the function within the context of the GPU.
integral0 = zero(Base.promote_op(func, typeof(u), Int, Int, Int, typeof(equations),
typeof(dg), map(typeof, args)...))
init = neutral = (integral0, zero(real(mesh)))

# Use quadrature to numerically integrate over entire domain
num_elements = nelements(dg, cache)
integral, total_volume = AcceleratedKernels.mapreduce(local_plus, 1:num_elements,
backend; init,
neutral) do element
# Initialize integral with zeros of the right shapeu
local_integral, local_total_volume = neutral

for j in eachnode(dg), i in eachnode(dg)
volume_jacobian = abs(inv(inverse_jacobian[i, j, element]))
local_integral += volume_jacobian * weights[i] * weights[j] *
func(u, i, j, element, equations, dg, args...)
local_total_volume += volume_jacobian * weights[i] * weights[j]
end

return (local_integral, local_total_volume)
end

# Normalize with total volume
if normalize
integral = integral / total_volume
end

return integral
end

function integrate(func::Func, u,
mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2},
Expand Down Expand Up @@ -410,18 +456,10 @@ function integrate(func::Func, u,
end
end

function analyze(::typeof(entropy_timederivative), _du, u, t,
function analyze(::typeof(entropy_timederivative), du, u, t,
mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2},
UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}},
equations, dg::Union{DGSEM, FDSBP}, cache)
# TODO GPU AnalysiCallback currently lives on CPU
backend = trixi_backend(u)
if backend isa Nothing # TODO GPU KA CPU backend
du = _du
else
du = Array(_du)
end

# Calculate
# Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
integrate_via_indices(u, mesh, equations, dg, cache,
Expand Down
96 changes: 65 additions & 31 deletions src/callbacks_step/analysis_dg3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,19 @@ function calc_error_norms(func, u, t, analyzer,
return l2_error, linf_error
end

function calc_error_norms(func, _u, t, analyzer,
function calc_error_norms(func, u, t, analyzer,
mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}},
equations, initial_condition,
dg::DGSEM, cache, cache_analysis)
@unpack vandermonde, weights = analyzer
@unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis
@unpack node_coordinates, inverse_jacobian = cache.elements

# TODO GPU AnalysisCallback currently lives on CPU
backend = trixi_backend(_u)
if backend isa Nothing # TODO GPU KA CPU backend
@unpack node_coordinates, inverse_jacobian = cache.elements
u = _u
else
node_coordinates = Array(cache.elements.node_coordinates)
inverse_jacobian = Array(cache.elements.inverse_jacobian)
u = Array(_u)
# Calculate error norms on the CPU, to ensure the order of summation is the same.
if trixi_backend(u) !== nothing
node_coordinates = Array(node_coordinates)
inverse_jacobian = Array(inverse_jacobian)
u = Array(u)
end

# Set up data structures
Expand Down Expand Up @@ -389,22 +386,22 @@ function integrate_via_indices(func::Func, u,
return integral
end

function integrate_via_indices(func::Func, _u,
function integrate_via_indices(func::Func, u,
mesh::Union{StructuredMesh{3}, P4estMesh{3},
T8codeMesh{3}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
# TODO GPU AnalysiCallback currently lives on CPU
backend = trixi_backend(_u)
if backend isa Nothing # TODO GPU KA CPU backend
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements
u = _u
else
weights = Array(dg.basis.weights)
inverse_jacobian = Array(cache.elements.inverse_jacobian)
u = Array(_u)
end
return integrate_via_indices(func, trixi_backend(u), u, mesh, equations, dg, cache,
args...; normalize = normalize)
end

function integrate_via_indices(func::Func, ::Nothing, u,
mesh::Union{StructuredMesh{3}, P4estMesh{3},
T8codeMesh{3}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements

# Initialize integral with zeros of the right shape
integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
Expand All @@ -429,6 +426,51 @@ function integrate_via_indices(func::Func, _u,
return integral
end

function integrate_via_indices(func::Func, backend::Backend, u,
mesh::Union{StructuredMesh{3}, P4estMesh{3},
T8codeMesh{3}},
equations, dg::DGSEM, cache,
args...; normalize = true) where {Func}
@unpack weights = dg.basis
@unpack inverse_jacobian = cache.elements

function local_plus((integral, total_volume), (integral_element, volume_element))
return (integral + integral_element, total_volume + volume_element)
end

# `func(u, 1,1,1,1, equations, dg, args...)` might access GPU memory
# so we have to rely on the compiler to correctly infer the type of the integral here.
# TODO: Technically we need device_promote_op here that "infers" the function within the context of the GPU.
integral0 = zero(Base.promote_op(func, typeof(u), Int, Int, Int, Int,
typeof(equations), typeof(dg),
map(typeof, args)...))
init = neutral = (integral0, zero(real(mesh)))

# Use quadrature to numerically integrate over entire domain
num_elements = nelements(dg, cache)
integral, total_volume = AcceleratedKernels.mapreduce(local_plus, 1:num_elements,
backend; init,
neutral) do element
# Initialize integral with zeros of the right shape
local_integral, local_total_volume = neutral

for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
volume_jacobian = abs(inv(inverse_jacobian[i, j, k, element]))
local_integral += volume_jacobian * weights[i] * weights[j] * weights[k] *
func(u, i, j, k, element, equations, dg, args...)
local_total_volume += volume_jacobian * weights[i] * weights[j] * weights[k]
end
return local_integral, local_total_volume
end

# Normalize with total volume
if normalize
integral = integral / total_volume
end

return integral
end

function integrate(func::Func, u,
mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3},
T8codeMesh{3}},
Expand Down Expand Up @@ -460,18 +502,10 @@ function integrate(func::Func, u,
end
end

function analyze(::typeof(entropy_timederivative), _du, u, t,
function analyze(::typeof(entropy_timederivative), du, u, t,
mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3},
T8codeMesh{3}},
equations, dg::Union{DGSEM, FDSBP}, cache)
# TODO GPU AnalysiCallback currently lives on CPU
backend = trixi_backend(u)
if backend isa Nothing # TODO GPU KA CPU backend
du = _du
else
du = Array(_du)
end

# Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
integrate_via_indices(u, mesh, equations, dg, cache,
du) do u, i, j, k, element, equations, dg, du
Expand Down
36 changes: 15 additions & 21 deletions src/callbacks_step/stepsize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,31 +205,25 @@ function calc_max_scaled_speed(backend::Nothing, u, mesh, constant_speed, equati
return max_scaled_speed
end

function calc_max_scaled_speed(backend::Backend, u, mesh, constant_speed, equations, dg,
cache)
function calc_max_scaled_speed(backend::Backend, u, ::MeshT, constant_speed, equations,
dg,
cache) where {MeshT}
@unpack contravariant_vectors, inverse_jacobian = cache.elements

num_elements = nelements(dg, cache)
max_scaled_speeds = allocate(backend, eltype(u), num_elements)

kernel! = max_scaled_speed_KAkernel!(backend)
kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg,
contravariant_vectors,
inverse_jacobian;
ndrange = num_elements)

return maximum(max_scaled_speeds)
end
init = neutral = AcceleratedKernels.neutral_element(Base.max, eltype(u))

# Provide a custom neutral and init element since we "reduce" over 1:num_elements
max_scaled_speed = AcceleratedKernels.mapreduce(Base.max, 1:num_elements, backend;
init, neutral) do element
max_scaled_speed_per_element(u, MeshT, constant_speed,
equations, dg,
contravariant_vectors,
inverse_jacobian,
element)
end

@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, MeshT, constant_speed,
equations,
dg, contravariant_vectors, inverse_jacobian)
element = @index(Global)
max_scaled_speeds[element] = max_scaled_speed_per_element(u, MeshT, constant_speed,
equations, dg,
contravariant_vectors,
inverse_jacobian,
element)
return max_scaled_speed
end

include("stepsize_dg1d.jl")
Expand Down
Loading