diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 00000000000..344b8eacc3a --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,21 @@ +env: + +steps: + - label: "CUDA Julia {{matrix.version}}" + matrix: + setup: + version: + - "1.10" + plugins: + - JuliaCI/julia#v1: + version: "{{matrix.version}}" + - JuliaCI/julia-test#v1: ~ + env: + TRIXI_TEST: "CUDA" + agents: + queue: "juliagpu" + cuda: "*" + if: build.message !~ /\[skip ci\]/ + timeout_in_minutes: 60 + soft_fail: + - exit_status: 3 diff --git a/Project.toml b/Project.toml index ed3f19e910b..2ba77fa3158 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.11.12-DEV" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -56,14 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" +TrixiCUDAExt = "CUDA" [compat] Accessors = "0.1.36" +Adapt = "4" +CUDA = "5" CodeTracking = "1.0.5" ConstructionBase = "1.5" Convex = "0.16" diff --git a/docs/make.jl b/docs/make.jl index c65a20133a8..5277852cac2 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -163,7 +163,8 @@ makedocs( "Style guide" => "styleguide.md", "Testing" => "testing.md", "Performance" => "performance.md", - "Parallelization" => "parallelization.md" + "Parallelization" => "parallelization.md", + "Heterogeneous" => "heterogeneous.md" ], "Troubleshooting and FAQ" => "troubleshooting.md", "Reference" => [ diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md new file mode 100644 index 00000000000..60bda029a40 --- /dev/null +++ b/docs/src/heterogeneous.md @@ -0,0 +1,82 @@ +# Heterogeneous computing + +Support for heterogeneous computing is currently being worked on. + +## The use of Adapt.jl + +[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for +the translation of nested data structures. The primary goal is to allow the substitution of `Array` +at the storage leaves with a GPU array like `CuArray`. + +To facilitate this data structures must be parameterized, so instead of: + +```julia +struct Container + data::Array{Float64,2} +end +``` + +They must be written as: + +```julia +struct Container{D<:AbstractArray} <: Trixi.AbstractContainer + data::D +end +``` + +furthermore, we need to define a function that allows for the conversion of storage +of our types: + +```julia +function Adapt.adapt_structure(to, C::Container) + return Container(adapt(to, C.data)) +end +``` + +or simply + +```julia +Adapt.@adapt_structure(Container) +``` + +additionally, we must define `Adapt.parent_type`. + +```julia +function Adapt.parent_type(::Type{<:Container{D}}) where D + return D +end +``` + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +Array + +julia> using CUDA + +julia> GPU_C = adapt(CuArray, C) +Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0]) + +julia> Trixi.storage_type(C) +CuArray +``` + +## Element-type conversion with `Trixi.trixi_adapt`. + +We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption + +```julia-repl +julia> C = Container(zeros(3)) +Container{Vector{Float64}}([0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(Array, Float32, C) +Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0]) + +julia> Trixi.trixi_adapt(CuArray, Float32, C) +Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0]) +``` + +!!! note + `adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`. \ No newline at end of file diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl new file mode 100644 index 00000000000..681d2f53a1e --- /dev/null +++ b/ext/TrixiCUDAExt.jl @@ -0,0 +1,11 @@ +# Package extension for adding CUDA-based features to Trixi.jl +module TrixiCUDAExt + +import CUDA: CuArray +import Trixi + +function Trixi.storage_type(::Type{<:CuArray}) + return CuArray +end + +end diff --git a/src/Trixi.jl b/src/Trixi.jl index 84ca91b3070..d5ff21192c4 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -44,6 +44,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!, using DelimitedFiles: readdlm using Downloads: Downloads +using Adapt: Adapt, adapt using CodeTracking: CodeTracking using ConstructionBase: ConstructionBase using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array @@ -125,6 +126,7 @@ include("basic_types.jl") # Include all top-level source files include("auxiliary/auxiliary.jl") +include("auxiliary/vector_of_arrays.jl") include("auxiliary/mpi.jl") include("auxiliary/p4est.jl") include("auxiliary/t8code.jl") diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 90650f6abcf..5738467ec6b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -314,4 +314,88 @@ end function raw_copy!(c::AbstractContainer, from::Int, destination::Int) raw_copy!(c, c, from, from, destination) end + +# Trixi storage types must implement these two Adapt.jl methods +function Adapt.adapt_structure(to, c::AbstractContainer) + error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))") +end + +function Adapt.parent_type(C::Type{<:AbstractContainer}) + error("Interface: Must implement Adapt.parent_type(::Type{$C}") +end + +function Adapt.unwrap_type(C::Type{<:AbstractContainer}) + return Adapt.unwrap_type(Adapt.parent_type(C)) +end + +# TODO: Upstream to Adapt +function storage_type(x) + return storage_type(typeof(x)) +end + +function storage_type(T::Type) + error("Interface: Must implement storage_type(::Type{$T}") +end + +function storage_type(::Type{<:Array}) + Array +end + +function storage_type(C::Type{<:AbstractContainer}) + return storage_type(Adapt.unwrap_type(C)) +end + +# For some storage backends like CUDA.jl, empty arrays do seem to simply be +# null pointers which can cause `unsafe_wrap` to fail when calling +# Adapt.adapt (ArgumentError, see +# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229). +# To circumvent this, on length zero arrays this allocates +# a separate empty array instead of wrapping. +# However, since zero length arrays are not used in calculations, +# it should be okay if the underlying storage vectors and wrapped arrays +# are not the same as long as they are properly wrapped when `resize!`d etc. +function unsafe_wrap_or_alloc(to, vector, size) + if length(vector) == 0 + return similar(vector, size) + else + return unsafe_wrap(to, pointer(vector), size) + end +end + +struct TrixiAdaptor{Storage, Real} end + +function trixi_adapt(storage, real, x) + adapt(TrixiAdaptor{storage, real}(), x) +end + +# Custom rules +# 1. handling of StaticArrays +function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real}, + x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N} + StaticArrays.similar_type(x, Real)(x) +end + +# 2. Handling of Arrays +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: AbstractFloat} + adapt(Storage{Real}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray{T}) where {Storage, Real, + T <: StaticArrays.StaticArray} + adapt(Storage{StaticArrays.similar_type(T, Real)}, x) +end + +function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real}, + x::AbstractArray) where {Storage, Real} + adapt(Storage, x) +end + +# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again + +function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage} + return unsafe_wrap_or_alloc(Storage, vec, size) +end end # @muladd diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl new file mode 100644 index 00000000000..0fa8dd7f1ec --- /dev/null +++ b/src/auxiliary/vector_of_arrays.jl @@ -0,0 +1,31 @@ +# By default, Julia/LLVM does not use fused multiply-add operations (FMAs). +# Since these FMAs can increase the performance of many numerical algorithms, +# we need to opt-in explicitly. +# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details. +@muladd begin +#! format: noindent + +# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector. +# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is +# not possible for a plain Vector of Arrays. +struct VecOfArrays{T <: AbstractArray} + arrays::Vector{T} +end +Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i) +Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays) +Base.size(v::VecOfArrays) = Base.size(v.arrays) +Base.length(v::VecOfArrays) = Base.length(v.arrays) +Base.eltype(v::VecOfArrays{T}) where {T} = T +function Adapt.adapt_structure(to, v::VecOfArrays) + return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays]) +end +function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T} + return T +end +function Adapt.unwrap_type(A::Type{<:VecOfArrays}) + Adapt.unwrap_type(Adapt.parent_type(A)) +end +function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray}) + VecOfArrays(v) +end +end # @muladd diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index c909196b5db..f86be5dc069 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, solver::Solver cache::Cache performance_counter::PerformanceCounter - - function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, - BoundaryConditions, SourceTerms, Solver, - Cache}(mesh::Mesh, equations::Equations, - initial_condition::InitialCondition, - boundary_conditions::BoundaryConditions, - source_terms::SourceTerms, - solver::Solver, - cache::Cache) where {Mesh, Equations, - InitialCondition, - BoundaryConditions, - SourceTerms, - Solver, - Cache} - performance_counter = PerformanceCounter() - - new(mesh, equations, initial_condition, boundary_conditions, source_terms, - solver, cache, performance_counter) - end end """ @@ -74,6 +55,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions) + performance_counter = PerformanceCounter() + SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), @@ -81,9 +64,13 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver initial_condition, _boundary_conditions, source_terms, solver, - cache) + cache, + performance_counter) end +# @eval due to @muladd +@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic) + # Create a new semidiscretization but change some parameters compared to the input. # `Base.similar` follows a related concept but would require us to `copy` the `mesh`, # which would impact the performance. Instead, `SciMLBase.remake` has exactly the diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 20b989da334..28774e0029a 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -400,6 +400,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral} volume_integral::VolumeIntegral end +# @eval due to @muladd +@eval Adapt.@adapt_structure(DG) + function Base.show(io::IO, dg::DG) @nospecialize dg # reduce precompilation time diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl index 0ba32565fbb..90c094e427d 100644 --- a/src/solvers/dgsem/basis_lobatto_legendre.jl +++ b/src/solvers/dgsem/basis_lobatto_legendre.jl @@ -34,6 +34,32 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES, # negative adjoint wrt the SBP dot product end +function Adapt.adapt_structure(to, basis::LobattoLegendreBasis) + inverse_vandermonde_legendre = adapt(to, basis.inverse_vandermonde_legendre) + RealT = eltype(inverse_vandermonde_legendre) + + nodes = SVector{<:Any, RealT}(basis.nodes) + weights = SVector{<:Any, RealT}(basis.weights) + inverse_weights = SVector{<:Any, RealT}(basis.inverse_weights) + boundary_interpolation = adapt(to, basis.boundary_interpolation) + derivative_matrix = adapt(to, basis.derivative_matrix) + derivative_split = adapt(to, basis.derivative_split) + derivative_split_transpose = adapt(to, basis.derivative_split_transpose) + derivative_dhat = adapt(to, basis.derivative_dhat) + return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes), + typeof(inverse_vandermonde_legendre), + typeof(boundary_interpolation), + typeof(derivative_matrix)}(nodes, + weights, + inverse_weights, + inverse_vandermonde_legendre, + boundary_interpolation, + derivative_matrix, + derivative_split, + derivative_split_transpose, + derivative_dhat) +end + function LobattoLegendreBasis(RealT, polydeg::Integer) nnodes_ = polydeg + 1 @@ -155,6 +181,17 @@ struct LobattoLegendreMortarL2{RealT <: Real, NNODES, reverse_lower::ReverseMatrix end +function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2) + forward_upper = adapt(to, mortar.forward_upper) + forward_lower = adapt(to, mortar.forward_lower) + reverse_upper = adapt(to, mortar.reverse_upper) + reverse_lower = adapt(to, mortar.reverse_lower) + return LobattoLegendreMortarL2{eltype(forward_upper), nnodes(mortar), + typeof(forward_upper), + typeof(reverse_upper)}(forward_upper, forward_lower, + reverse_upper, reverse_lower) +end + function MortarL2(basis::LobattoLegendreBasis) RealT = real(basis) nnodes_ = nnodes(basis) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index a070db6b701..68e5b3d758b 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -6,25 +6,31 @@ #! format: noindent mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer + NDIMSP2, NDIMSP3, + ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1}, + ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2}, + ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3}, + VectorRealT <: DenseVector{RealT}, + VectoruEltype <: DenseVector{uEltype}} <: + AbstractContainer # Physical coordinates at each node - node_coordinates::Array{RealT, NDIMSP2} # [orientation, node_i, node_j, node_k, element] + node_coordinates::ArrayNDIMSP2 # [orientation, node_i, node_j, node_k, element] # Jacobian matrix of the transformation # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,... - jacobian_matrix::Array{RealT, NDIMSP3} + jacobian_matrix::ArrayNDIMSP3 # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension) - contravariant_vectors::Array{RealT, NDIMSP3} # [dimension, index, node_i, node_j, node_k, element] + contravariant_vectors::ArrayNDIMSP3 # [dimension, index, node_i, node_j, node_k, element] # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix) - inverse_jacobian::Array{RealT, NDIMSP1} # [node_i, node_j, node_k, element] + inverse_jacobian::ArrayNDIMSP1 # [node_i, node_j, node_k, element] # Buffer for calculated surface flux - surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element] + surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element] # internal `resize!`able storage - _node_coordinates::Vector{RealT} - _jacobian_matrix::Vector{RealT} - _contravariant_vectors::Vector{RealT} - _inverse_jacobian::Vector{RealT} - _surface_flux_values::Vector{uEltype} + _node_coordinates::VectorRealT + _jacobian_matrix::VectorRealT + _contravariant_vectors::VectorRealT + _inverse_jacobian::VectorRealT + _surface_flux_values::VectoruEltype end @inline function nelements(elements::P4estElementContainer) @@ -36,7 +42,7 @@ end RealT, uEltype } - uEltype + return uEltype end # Only one-dimensional `Array`s are `resize!`able in Julia. @@ -51,28 +57,30 @@ function Base.resize!(elements::P4estElementContainer, capacity) n_dims = ndims(elements) n_nodes = size(elements.node_coordinates, 2) n_variables = size(elements.surface_flux_values, 1) + ArrayType = storage_type(elements) resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity) - elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates), + elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates), (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity) - elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix), + elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix), (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_contravariant_vectors, length(_jacobian_matrix)) - elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors), + elements.contravariant_vectors = unsafe_wrap(ArrayType, + pointer(_contravariant_vectors), size(elements.jacobian_matrix)) resize!(_inverse_jacobian, n_nodes^n_dims * capacity) - elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian), + elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian), (ntuple(_ -> n_nodes, n_dims)..., capacity)) resize!(_surface_flux_values, n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity) - elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values), + elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., n_dims * 2, capacity)) @@ -117,33 +125,104 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT}, NDIMS * 2, nelements)) elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(node_coordinates, jacobian_matrix, - contravariant_vectors, - inverse_jacobian, surface_flux_values, - _node_coordinates, _jacobian_matrix, - _contravariant_vectors, - _inverse_jacobian, _surface_flux_values) + NDIMS + 3, Array{RealT, NDIMS + 1}, + Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3}, + Vector{RealT}, Vector{uEltype}}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) init_elements!(elements, mesh, basis) return elements end -mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +function Adapt.parent_type(::Type{<:P4estElementContainer{<:Any, <:Any, <:Any, <:Any, + <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, + elements::P4estElementContainer{NDIMS}) where {NDIMS} + # Adapt underlying storage + _node_coordinates = adapt(to, elements._node_coordinates) + _jacobian_matrix = adapt(to, elements._jacobian_matrix) + _contravariant_vectors = adapt(to, elements._contravariant_vectors) + _inverse_jacobian = adapt(to, elements._inverse_jacobian) + _surface_flux_values = adapt(to, elements._surface_flux_values) + + RealT = eltype(_inverse_jacobian) + uEltype = eltype(_surface_flux_values) + + # Wrap arrays again + node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates, + size(elements.node_coordinates)) + jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix, + size(elements.jacobian_matrix)) + contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors, + size(jacobian_matrix)) + inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian, + size(elements.inverse_jacobian)) + surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values, + size(elements.surface_flux_values)) + + new_type_params = (NDIMS, + RealT, + uEltype, + NDIMS + 1, + NDIMS + 2, + NDIMS + 3, + typeof(inverse_jacobian), # ArrayNDIMSP1 + typeof(node_coordinates), # ArrayNDIMSP2 + typeof(jacobian_matrix), # ArrayNDIMSP3 + typeof(_node_coordinates), # VectorRealT + typeof(_surface_flux_values)) # VectoruEltype + return P4estElementContainer{new_type_params...}(node_coordinates, + jacobian_matrix, + contravariant_vectors, + inverse_jacobian, + surface_flux_values, + _node_coordinates, + _jacobian_matrix, + _contravariant_vectors, + _inverse_jacobian, + _surface_flux_values) +end + +mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - neighbor_ids::Matrix{Int} # [primary/secondary, interface] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface] + u::uArray # [primary/secondary, variable, i, j, interface] + neighbor_ids::IdsMatrix # [primary/secondary, interface] + node_indices::IndicesMatrix # [primary/secondary, interface] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline function ninterfaces(interfaces::P4estInterfaceContainer) size(interfaces.neighbor_ids, 2) end @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(interfaces::P4estInterfaceContainer, capacity) @@ -152,17 +231,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity) n_dims = ndims(interfaces) n_nodes = size(interfaces.u, 3) n_variables = size(interfaces.u, 2) + ArrayType = storage_type(interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - interfaces.u = unsafe_wrap(Array, pointer(_u), + interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, 2 * capacity) - interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity)) + interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), + (2, capacity)) resize!(_node_indices, 2 * capacity) - interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), + (2, capacity)) return nothing end @@ -189,10 +271,15 @@ function init_interfaces(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces)) - interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids, - node_indices, - _u, _neighbor_ids, - _node_indices) + interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(neighbor_ids), + typeof(node_indices), typeof(_u), + typeof(_neighbor_ids), typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) init_interfaces!(interfaces, mesh) @@ -205,21 +292,58 @@ function init_interfaces!(interfaces, mesh::Union{P4estMesh, P4estMeshView}) return interfaces end -mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <: +function Adapt.parent_type(::Type{<:P4estInterfaceContainer{<:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer) + # Adapt underlying storage + _u = adapt(to, interfaces._u) + _neighbor_ids = adapt(to, interfaces._neighbor_ids) + _node_indices = adapt(to, interfaces._node_indices) + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, + size(interfaces.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, + size(interfaces.node_indices)) + + NDIMS = ndims(interfaces) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 2, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + +mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1, + uArray <: DenseArray{uEltype, NDIMSP1}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP1} # [variables, i, j, boundary] - neighbor_ids::Vector{Int} # [boundary] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary] + u::uArray # [variables, i, j, boundary] + neighbor_ids::IdsVector # [boundary] + node_indices::IndicesVector # [boundary] name::Vector{Symbol} # [boundary] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nboundaries(boundaries::P4estBoundaryContainer) length(boundaries.neighbor_ids) end @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(boundaries::P4estBoundaryContainer, capacity) @@ -228,9 +352,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity) n_dims = ndims(boundaries) n_nodes = size(boundaries.u, 2) n_variables = size(boundaries.u, 1) + ArrayType = storage_type(boundaries) resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity) - boundaries.u = unsafe_wrap(Array, pointer(_u), + boundaries.u = unsafe_wrap(ArrayType, pointer(_u), (n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -263,9 +388,11 @@ function init_boundaries(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries) names = Vector{Symbol}(undef, n_boundaries) - boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids, - node_indices, names, - _u) + boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, + node_indices, names, + _u) if n_boundaries > 0 init_boundaries!(boundaries, mesh) @@ -312,6 +439,25 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh) return nothing end +function Adapt.parent_type(::Type{<:P4estBoundaryContainer{<:Any, <:Any, <:Any, ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer) + _u = adapt(to, boundaries._u) + u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u)) + neighbor_ids = adapt(to, boundaries.neighbor_ids) + node_indices = adapt(to, boundaries.node_indices) + name = boundaries.name + + NDIMS = ndims(boundaries) + return P4estBoundaryContainer{NDIMS, eltype(_u), NDIMS + 1, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u)}(u, neighbor_ids, node_indices, + name, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D) @@ -337,20 +483,32 @@ end # │ └─────────────┴─────────────┘ └───────────────────────────┘ # │ # ⋅────> ξ -mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <: +mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + IdsMatrix <: DenseMatrix{Int}, + IndicesMatrix <: + DenseMatrix{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}, + IdsVector <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - neighbor_ids::Matrix{Int} # [position, mortar] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + u::uArray # [small/large side, variable, position, i, j, mortar] + neighbor_ids::IdsMatrix # [position, mortar] + node_indices::IndicesMatrix # [small/large, mortar] # internal `resize!`able storage - _u::Vector{uEltype} - _neighbor_ids::Vector{Int} - _node_indices::Vector{NTuple{NDIMS, Symbol}} + _u::uVector + _neighbor_ids::IdsVector + _node_indices::IndicesVector end @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2) @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS +@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS, + uEltype} + uEltype +end # See explanation of Base.resize! for the element container function Base.resize!(mortars::P4estMortarContainer, capacity) @@ -359,18 +517,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity) n_dims = ndims(mortars) n_nodes = size(mortars.u, 4) n_variables = size(mortars.u, 2) + ArrayType = storage_type(mortars) resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity) - mortars.u = unsafe_wrap(Array, pointer(_u), + mortars.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, 2^(n_dims - 1), ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity) - mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), + mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids), (2^(n_dims - 1) + 1, capacity)) resize!(_node_indices, 2 * capacity) - mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity)) + mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity)) return nothing end @@ -398,12 +557,15 @@ function init_mortars(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equatio _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars) node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars)) - mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u, - neighbor_ids, - node_indices, - _u, - _neighbor_ids, - _node_indices) + mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u), + typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), + typeof(_node_indices)}(u, + neighbor_ids, + node_indices, + _u, + _neighbor_ids, + _node_indices) if n_mortars > 0 init_mortars!(mortars, mesh) @@ -418,6 +580,34 @@ function init_mortars!(mortars, mesh::Union{P4estMesh, P4estMeshView}) return mortars end +function Adapt.parent_type(::Type{<:P4estMortarContainer{<:Any, <:Any, <:Any, <:Any, + ArrayT}}) where {ArrayT} + ArrayT +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mortars::P4estMortarContainer) + # Adapt underlying storage + _u = adapt(to, mortars._u) + _neighbor_ids = adapt(to, mortars._neighbor_ids) + _node_indices = adapt(to, mortars._node_indices) + + # Wrap arrays again + u = unsafe_wrap_or_alloc(to, _u, size(mortars.u)) + neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids)) + node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices)) + + NDIMS = ndims(mortars) + new_type_params = (NDIMS, + eltype(_u), + NDIMS + 1, + NDIMS + 3, + typeof(u), typeof(neighbor_ids), typeof(node_indices), + typeof(_u), typeof(_neighbor_ids), typeof(_node_indices)) + return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices, + _u, _neighbor_ids, _node_indices) +end + function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache) # Re-initialize elements container @unpack elements = cache diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl index 676b37efff3..cb9cd1ffc95 100644 --- a/src/solvers/dgsem_p4est/containers_parallel.jl +++ b/src/solvers/dgsem_p4est/containers_parallel.jl @@ -5,15 +5,19 @@ @muladd begin #! format: noindent -mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <: +mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2, + uArray <: DenseArray{uEltype, NDIMSP2}, + VecInt <: DenseVector{Int}, + IndicesVector <: + DenseVector{NTuple{NDIMS, Symbol}}, + uVector <: DenseVector{uEltype}} <: AbstractContainer - u::Array{uEltype, NDIMSP2} # [primary/secondary, variable, i, j, interface] - local_neighbor_ids::Vector{Int} # [interface] - node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface] - local_sides::Vector{Int} # [interface] - + u::uArray # [primary/secondary, variable, i, j, interface] + local_neighbor_ids::VecInt # [interface] + node_indices::IndicesVector # [interface] + local_sides::VecInt # [interface] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector end @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) @@ -27,9 +31,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity) n_dims = ndims(mpi_interfaces) n_nodes = size(mpi_interfaces.u, 3) n_variables = size(mpi_interfaces.u, 2) + ArrayType = storage_type(mpi_interfaces) resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity) - mpi_interfaces.u = unsafe_wrap(Array, pointer(_u), + mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u), (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)..., capacity)) @@ -64,11 +69,13 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, local_sides = Vector{Int}(undef, n_mpi_interfaces) - mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, - local_neighbor_ids, - node_indices, - local_sides, - _u) + mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, + _u) init_mpi_interfaces!(mpi_interfaces, mesh) @@ -81,6 +88,32 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh) return mpi_interfaces end +function Adapt.parent_type(::Type{<:Trixi.P4estMPIInterfaceContainer{<:Any, <:Any, + <:Any, A}}) where {A} + return A +end + +# Manual adapt_structure since we have aliasing memory +function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer) + # Adapt Vectors and underlying storage + _u = adapt(to, mpi_interfaces._u) + local_neighbor_ids = adapt(to, mpi_interfaces.local_neighbor_ids) + node_indices = adapt(to, mpi_interfaces.node_indices) + local_sides = adapt(to, mpi_interfaces.local_sides) + + # Wrap array again + u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u)) + + NDIMS = ndims(mpi_interfaces) + return P4estMPIInterfaceContainer{NDIMS, eltype(u), + NDIMS + 2, + typeof(u), typeof(local_neighbor_ids), + typeof(node_indices), typeof(_u)}(u, + local_neighbor_ids, + node_indices, + local_sides, _u) +end + # Container data structure (structure-of-arrays style) for DG L2 mortars # # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into @@ -88,14 +121,17 @@ end # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds # the normal vectors on the surface of the small elements for each mortar. mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1, - NDIMSP2, NDIMSP3} <: AbstractContainer - u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar] - local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] - local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] - node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] - normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] + NDIMSP2, NDIMSP3, + uArray <: DenseArray{uEltype, NDIMSP3}, + uVector <: DenseVector{uEltype}} <: + AbstractContainer + u::uArray # [small/large side, variable, position, i, j, mortar] + local_neighbor_ids::Vector{Vector{Int}} # [mortar][ids] + local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions] + node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar] + normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar] # internal `resize!`able storage - _u::Vector{uEltype} + _u::uVector _node_indices::Vector{NTuple{NDIMS, Symbol}} _normal_directions::Vector{RealT} end @@ -164,11 +200,12 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq 2^(NDIMS - 1), n_mpi_mortars)) mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2, - NDIMS + 3}(u, local_neighbor_ids, - local_neighbor_positions, - node_indices, normal_directions, - _u, _node_indices, - _normal_directions) + NDIMS + 3, typeof(u), + typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, normal_directions, + _u, _node_indices, + _normal_directions) if n_mpi_mortars > 0 init_mpi_mortars!(mpi_mortars, mesh, basis, elements) @@ -184,6 +221,33 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements return mpi_mortars end +function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer) + # TODO: Vector of Vector type data structure does not work on GPUs, + # must be redesigned. This skeleton implementation here just exists just + # for compatibility with the rest of the KA.jl solver code + + _u = adapt(to, mpi_mortars._u) + _node_indices = mpi_mortars._node_indices + _normal_directions = mpi_mortars._normal_directions + + u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u)) + local_neighbor_ids = mpi_mortars.local_neighbor_ids + local_neighbor_positions = mpi_mortars.local_neighbor_positions + node_indices = mpi_mortars.node_indices + normal_directions = mpi_mortars.normal_directions + + NDIMS = ndims(mpi_mortars) + return P4estMPIMortarContainer{NDIMS, eltype(_u), + eltype(_normal_directions), + NDIMS + 1, NDIMS + 2, NDIMS + 3, + typeof(u), typeof(_u)}(u, local_neighbor_ids, + local_neighbor_positions, + node_indices, + normal_directions, _u, + _node_indices, + _normal_directions) +end + # Overload init! function for regular interfaces, regular mortars and boundaries since they must # call the appropriate init_surfaces! function for parallel p4est meshes function init_interfaces!(interfaces, mesh::ParallelP4estMesh) diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl index 2cc201dd1f0..7acddf07b4b 100644 --- a/src/solvers/dgsem_p4est/dg_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_parallel.jl @@ -5,12 +5,13 @@ @muladd begin #! format: noindent -mutable struct P4estMPICache{uEltype} +mutable struct P4estMPICache{BufferType <: DenseVector, + VecInt <: DenseVector{<:Integer}} mpi_neighbor_ranks::Vector{Int} - mpi_neighbor_interfaces::Vector{Vector{Int}} - mpi_neighbor_mortars::Vector{Vector{Int}} - mpi_send_buffers::Vector{Vector{uEltype}} - mpi_recv_buffers::Vector{Vector{uEltype}} + mpi_neighbor_interfaces::VecOfArrays{VecInt} + mpi_neighbor_mortars::VecOfArrays{VecInt} + mpi_send_buffers::VecOfArrays{BufferType} + mpi_recv_buffers::VecOfArrays{BufferType} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} @@ -25,25 +26,29 @@ function P4estMPICache(uEltype) end mpi_neighbor_ranks = Vector{Int}(undef, 0) - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) - mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) - mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) - mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays + mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays + mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays mpi_send_requests = Vector{MPI.Request}(undef, 0) mpi_recv_requests = Vector{MPI.Request}(undef, 0) n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1) n_elements_global = 0 first_element_global_id = 0 - P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, - mpi_neighbor_mortars, - mpi_send_buffers, mpi_recv_buffers, - mpi_send_requests, mpi_recv_requests, - n_elements_by_rank, n_elements_global, - first_element_global_id) + P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks, + mpi_neighbor_interfaces, + mpi_neighbor_mortars, + mpi_send_buffers, mpi_recv_buffers, + mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, + first_element_global_id) end -@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype +@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType) + +# @eval due to @muladd +@eval Adapt.@adapt_structure(P4estMPICache) ## # Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10. @@ -265,16 +270,16 @@ end function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype) - mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, - mpi_mortars, - mesh) + mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces, + mpi_mortars, + mesh) - mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces, - mpi_neighbor_mortars, - ndims(mesh), - nvars, - n_nodes, - uEltype) + _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces, + _mpi_neighbor_mortars, + ndims(mesh), + nvars, + n_nodes, + uEltype) # Determine local and total number of elements n_elements_global = Int(mesh.p4est.global_num_quadrants[]) @@ -286,6 +291,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh, first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1 @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements" + mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces) + mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars) + mpi_send_buffers = VecOfArrays(_mpi_send_buffers) + mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers) + # TODO reuse existing structures @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars, diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl index 0cb3bd7f409..d6cf6e1ce6d 100644 --- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl +++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl @@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions set by the user in the elixir file is also stored for printing. """ -mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}} +mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}, + Vec <: AbstractVector{<:Integer}} boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet - boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices + boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier end @@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache) boundary_symbol_indices = Dict{Symbol, Vector{Int}}() container = UnstructuredSortedBoundaryTypes{n_boundary_types, - typeof(boundary_condition_types)}(boundary_condition_types, - boundary_indices, - boundary_conditions, - boundary_symbol_indices) + typeof(boundary_condition_types), + Vector{Int}}(boundary_condition_types, + boundary_indices, + boundary_conditions, + boundary_symbol_indices) initialize!(container, cache) end @@ -119,4 +121,7 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N return boundary_types_container end + +# @eval due to @muladd +@eval Adapt.@adapt_structure(UnstructuredSortedBoundaryTypes) end # @muladd diff --git a/test/Project.toml b/test/Project.toml index 8651b3d4d22..6c67d2a82df 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,10 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" diff --git a/test/runtests.jl b/test/runtests.jl index a9dfc4cb999..d08ff018837 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -116,4 +116,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics" include("test_paper_self_gravitating_gas_dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" + import CUDA + if CUDA.functional() + include("test_cuda.jl") + else + @warn "Unable to run CUDA tests on this machine" + end + end end diff --git a/test/test_aqua.jl b/test/test_aqua.jl index 9b3f2d67903..154088995ca 100644 --- a/test/test_aqua.jl +++ b/test/test_aqua.jl @@ -10,6 +10,7 @@ include("test_trixi.jl") @timed_testset "Aqua.jl" begin Aqua.test_all(Trixi, ambiguities = false, + unbound_args = false, # FIXME: UnstructuredSortedBoundaryTypes # exceptions necessary for adding a new method `StartUpDG.estimate_h` # in src/solvers/dgmulti/sbp.jl piracies = (treat_as_own = [Trixi.StartUpDG.RefElemData, diff --git a/test/test_cuda.jl b/test/test_cuda.jl new file mode 100644 index 00000000000..f2fd11233c6 --- /dev/null +++ b/test/test_cuda.jl @@ -0,0 +1,20 @@ +module TestCUDA + +using CUDA +using Test +using Trixi + +include("test_trixi.jl") + +# EXAMPLES_DIR = joinpath(examples_dir(), "dgmulti_1d") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +# TODO: + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) + +end # module diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl index e14adc69283..1f5735ddc35 100644 --- a/test/test_p4est_2d.jl +++ b/test/test_p4est_2d.jl @@ -27,6 +27,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_advection_nonconforming_flag.jl" begin diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl index 259eb39c545..c3291c3ba9d 100644 --- a/test/test_unstructured_2d.jl +++ b/test/test_unstructured_2d.jl @@ -2,6 +2,7 @@ module TestExamplesUnstructuredMesh2D using Test using Trixi +using Adapt include("test_trixi.jl") @@ -32,6 +33,12 @@ isdir(outdir) && rm(outdir, recursive = true) du_ode = similar(u_ode) @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 end + semi32 = Trixi.trixi_adapt(Array, Float32, semi) + @test real(semi32.solver) == Float32 + @test real(semi32.solver.basis) == Float32 + @test real(semi32.solver.mortar) == Float32 + # TODO: remake ignores the mesh as well + @test real(semi32.mesh) == Float64 end @trixi_testset "elixir_euler_free_stream.jl" begin