Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
env:

steps:
- label: "CUDA Julia {{matrix.version}}"
matrix:
setup:
version:
- "1.10"
plugins:
- JuliaCI/julia#v1:
version: "{{matrix.version}}"
- JuliaCI/julia-test#v1: ~
env:
TRIXI_TEST: "CUDA"
agents:
queue: "juliagpu"
cuda: "*"
if: build.message !~ /\[skip ci\]/
timeout_in_minutes: 60
soft_fail:
- exit_status: 3
5 changes: 5 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.11.12-DEV"

[deps]
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand Down Expand Up @@ -56,14 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

[extensions]
TrixiConvexECOSExt = ["Convex", "ECOS"]
TrixiMakieExt = "Makie"
TrixiNLsolveExt = "NLsolve"
TrixiCUDAExt = "CUDA"

[compat]
Accessors = "0.1.36"
Adapt = "4"
CUDA = "5"
CodeTracking = "1.0.5"
ConstructionBase = "1.5"
Convex = "0.16"
Expand Down
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ makedocs(
"Style guide" => "styleguide.md",
"Testing" => "testing.md",
"Performance" => "performance.md",
"Parallelization" => "parallelization.md"
"Parallelization" => "parallelization.md",
"Heterogeneous" => "heterogeneous.md"
],
"Troubleshooting and FAQ" => "troubleshooting.md",
"Reference" => [
Expand Down
82 changes: 82 additions & 0 deletions docs/src/heterogeneous.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Heterogeneous computing

Support for heterogeneous computing is currently being worked on.

## The use of Adapt.jl

[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for
the translation of nested data structures. The primary goal is to allow the substitution of `Array`
at the storage leaves with a GPU array like `CuArray`.

To facilitate this data structures must be parameterized, so instead of:

```julia
struct Container
data::Array{Float64,2}
end
```

They must be written as:

```julia
struct Container{D<:AbstractArray} <: Trixi.AbstractContainer
data::D
end
```

furthermore, we need to define a function that allows for the conversion of storage
of our types:

```julia
function Adapt.adapt_structure(to, C::Container)
return Container(adapt(to, C.data))
end
```

or simply

```julia
Adapt.@adapt_structure(Container)
```

additionally, we must define `Adapt.parent_type`.

```julia
function Adapt.parent_type(::Type{<:Container{D}}) where D
return D
end
```

```julia-repl
julia> C = Container(zeros(3))
Container{Vector{Float64}}([0.0, 0.0, 0.0])

julia> Trixi.storage_type(C)
Array

julia> using CUDA

julia> GPU_C = adapt(CuArray, C)
Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0])

julia> Trixi.storage_type(C)
CuArray
```

## Element-type conversion with `Trixi.trixi_adapt`.

We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption

```julia-repl
julia> C = Container(zeros(3))
Container{Vector{Float64}}([0.0, 0.0, 0.0])

julia> Trixi.trixi_adapt(Array, Float32, C)
Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0])

julia> Trixi.trixi_adapt(CuArray, Float32, C)
Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0])
```

!!! note
`adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`.
11 changes: 11 additions & 0 deletions ext/TrixiCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Package extension for adding CUDA-based features to Trixi.jl
module TrixiCUDAExt

import CUDA: CuArray
import Trixi

function Trixi.storage_type(::Type{<:CuArray})
return CuArray
end

end
2 changes: 2 additions & 0 deletions src/Trixi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!,

using DelimitedFiles: readdlm
using Downloads: Downloads
using Adapt: Adapt, adapt
using CodeTracking: CodeTracking
using ConstructionBase: ConstructionBase
using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array
Expand Down Expand Up @@ -125,6 +126,7 @@ include("basic_types.jl")

# Include all top-level source files
include("auxiliary/auxiliary.jl")
include("auxiliary/vector_of_arrays.jl")
include("auxiliary/mpi.jl")
include("auxiliary/p4est.jl")
include("auxiliary/t8code.jl")
Expand Down
84 changes: 84 additions & 0 deletions src/auxiliary/containers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,88 @@ end
function raw_copy!(c::AbstractContainer, from::Int, destination::Int)
raw_copy!(c, c, from, from, destination)
end

# Trixi storage types must implement these two Adapt.jl methods
function Adapt.adapt_structure(to, c::AbstractContainer)
error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))")
end

function Adapt.parent_type(C::Type{<:AbstractContainer})
error("Interface: Must implement Adapt.parent_type(::Type{$C}")
end

function Adapt.unwrap_type(C::Type{<:AbstractContainer})
return Adapt.unwrap_type(Adapt.parent_type(C))
end

# TODO: Upstream to Adapt
function storage_type(x)
return storage_type(typeof(x))
end

function storage_type(T::Type)
error("Interface: Must implement storage_type(::Type{$T}")
end

function storage_type(::Type{<:Array})
Array
end

function storage_type(C::Type{<:AbstractContainer})
return storage_type(Adapt.unwrap_type(C))
end

# For some storage backends like CUDA.jl, empty arrays do seem to simply be
# null pointers which can cause `unsafe_wrap` to fail when calling
# Adapt.adapt (ArgumentError, see
# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229).
# To circumvent this, on length zero arrays this allocates
# a separate empty array instead of wrapping.
# However, since zero length arrays are not used in calculations,
# it should be okay if the underlying storage vectors and wrapped arrays
# are not the same as long as they are properly wrapped when `resize!`d etc.
function unsafe_wrap_or_alloc(to, vector, size)
if length(vector) == 0
return similar(vector, size)
else
return unsafe_wrap(to, pointer(vector), size)
end
end

struct TrixiAdaptor{Storage, Real} end

function trixi_adapt(storage, real, x)
adapt(TrixiAdaptor{storage, real}(), x)
end

# Custom rules
# 1. handling of StaticArrays
function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real},
x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N}
StaticArrays.similar_type(x, Real)(x)
end

# 2. Handling of Arrays
function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
x::AbstractArray{T}) where {Storage, Real,
T <: AbstractFloat}
adapt(Storage{Real}, x)
end

function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
x::AbstractArray{T}) where {Storage, Real,
T <: StaticArrays.StaticArray}
adapt(Storage{StaticArrays.similar_type(T, Real)}, x)
end

function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
x::AbstractArray) where {Storage, Real}
adapt(Storage, x)
end

# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again

function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage}
return unsafe_wrap_or_alloc(Storage, vec, size)
end
end # @muladd
31 changes: 31 additions & 0 deletions src/auxiliary/vector_of_arrays.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
# Since these FMAs can increase the performance of many numerical algorithms,
# we need to opt-in explicitly.
# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
@muladd begin
#! format: noindent

# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector.
# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is
# not possible for a plain Vector of Arrays.
struct VecOfArrays{T <: AbstractArray}
arrays::Vector{T}
end
Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i)
Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays)
Base.size(v::VecOfArrays) = Base.size(v.arrays)
Base.length(v::VecOfArrays) = Base.length(v.arrays)
Base.eltype(v::VecOfArrays{T}) where {T} = T
function Adapt.adapt_structure(to, v::VecOfArrays)
return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays])
end
function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T}
return T
end
function Adapt.unwrap_type(A::Type{<:VecOfArrays})
Adapt.unwrap_type(Adapt.parent_type(A))
end
function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray})
VecOfArrays(v)
end
end # @muladd
27 changes: 7 additions & 20 deletions src/semidiscretization/semidiscretization_hyperbolic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
solver::Solver
cache::Cache
performance_counter::PerformanceCounter

function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
BoundaryConditions, SourceTerms, Solver,
Cache}(mesh::Mesh, equations::Equations,
initial_condition::InitialCondition,
boundary_conditions::BoundaryConditions,
source_terms::SourceTerms,
solver::Solver,
cache::Cache) where {Mesh, Equations,
InitialCondition,
BoundaryConditions,
SourceTerms,
Solver,
Cache}
performance_counter = PerformanceCounter()

new(mesh, equations, initial_condition, boundary_conditions, source_terms,
solver, cache, performance_counter)
end
end

"""
Expand Down Expand Up @@ -74,16 +55,22 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver

check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions)

performance_counter = PerformanceCounter()

SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
typeof(initial_condition),
typeof(_boundary_conditions), typeof(source_terms),
typeof(solver), typeof(cache)}(mesh, equations,
initial_condition,
_boundary_conditions,
source_terms, solver,
cache)
cache,
performance_counter)
end

# @eval due to @muladd
@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic)

# Create a new semidiscretization but change some parameters compared to the input.
# `Base.similar` follows a related concept but would require us to `copy` the `mesh`,
# which would impact the performance. Instead, `SciMLBase.remake` has exactly the
Expand Down
3 changes: 3 additions & 0 deletions src/solvers/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral}
volume_integral::VolumeIntegral
end

# @eval due to @muladd
@eval Adapt.@adapt_structure(DG)

function Base.show(io::IO, dg::DG)
@nospecialize dg # reduce precompilation time

Expand Down
Loading
Loading