diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
new file mode 100644
index 00000000000..344b8eacc3a
--- /dev/null
+++ b/.buildkite/pipeline.yml
@@ -0,0 +1,21 @@
+env:
+
+steps:
+  - label: "CUDA Julia {{matrix.version}}"
+    matrix:
+      setup:
+        version:
+          - "1.10"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "{{matrix.version}}"
+      - JuliaCI/julia-test#v1: ~
+    env:
+      TRIXI_TEST: "CUDA"
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    if: build.message !~ /\[skip ci\]/
+    timeout_in_minutes: 60
+    soft_fail:
+      - exit_status: 3
diff --git a/Project.toml b/Project.toml
index ed3f19e910b..2ba77fa3158 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "0.11.12-DEV"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
 ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
@@ -56,14 +57,18 @@ Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
 ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
 TrixiConvexECOSExt = ["Convex", "ECOS"]
 TrixiMakieExt = "Makie"
 TrixiNLsolveExt = "NLsolve"
+TrixiCUDAExt = "CUDA"
 
 [compat]
 Accessors = "0.1.36"
+Adapt = "4"
+CUDA = "5"
 CodeTracking = "1.0.5"
 ConstructionBase = "1.5"
 Convex = "0.16"
diff --git a/docs/make.jl b/docs/make.jl
index c65a20133a8..5277852cac2 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -163,7 +163,8 @@ makedocs(
                  "Style guide" => "styleguide.md",
                  "Testing" => "testing.md",
                  "Performance" => "performance.md",
-                 "Parallelization" => "parallelization.md"
+                 "Parallelization" => "parallelization.md",
+                 "Heterogeneous" => "heterogeneous.md"
              ],
              "Troubleshooting and FAQ" => "troubleshooting.md",
              "Reference" => [
diff --git a/docs/src/heterogeneous.md b/docs/src/heterogeneous.md
new file mode 100644
index 00000000000..60bda029a40
--- /dev/null
+++ b/docs/src/heterogeneous.md
@@ -0,0 +1,82 @@
+# Heterogeneous computing
+
+Support for heterogeneous computing is currently being worked on.
+
+## The use of Adapt.jl
+
+[`Adapt.jl`](https://github.com/JuliaGPU/Adapt.jl) is a package in the JuliaGPU family that allows for
+the translation of nested data structures. The primary goal is to allow the substitution of `Array` 
+at the storage leaves with a GPU array like `CuArray`.
+
+To facilitate this data structures must be parameterized, so instead of:
+
+```julia
+struct Container
+   data::Array{Float64,2}
+end
+```
+
+They must be written as:
+
+```julia
+struct Container{D<:AbstractArray} <: Trixi.AbstractContainer
+   data::D
+end
+```
+
+furthermore, we need to define a function that allows for the conversion of storage
+of our types: 
+
+```julia
+function Adapt.adapt_structure(to, C::Container)
+    return Container(adapt(to, C.data))
+end
+```
+
+or simply
+
+```julia
+Adapt.@adapt_structure(Container)
+```
+
+additionally, we must define `Adapt.parent_type`.
+
+```julia
+function Adapt.parent_type(::Type{<:Container{D}}) where D
+    return D
+end
+```
+
+```julia-repl
+julia> C = Container(zeros(3))
+Container{Vector{Float64}}([0.0, 0.0, 0.0])
+
+julia> Trixi.storage_type(C)
+Array
+
+julia> using CUDA
+
+julia> GPU_C = adapt(CuArray, C)
+Container{CuArray{Float64, 1, CUDA.DeviceMemory}}([0.0, 0.0, 0.0])
+
+julia> Trixi.storage_type(C)
+CuArray
+```
+
+## Element-type conversion with `Trixi.trixi_adapt`.
+
+We can use Trixi.trixi_adapt to perform both an element-type and a storage-type adoption
+
+```julia-repl
+julia> C = Container(zeros(3))
+Container{Vector{Float64}}([0.0, 0.0, 0.0])
+
+julia> Trixi.trixi_adapt(Array, Float32, C)
+Container{Vector{Float32}}(Float32[0.0, 0.0, 0.0])
+
+julia> Trixi.trixi_adapt(CuArray, Float32, C)
+Container{CuArray{Float32, 1, CUDA.DeviceMemory}}(Float32[0.0, 0.0, 0.0])
+```
+
+!!! note
+    `adapt(Array{Float32}, C)` is tempting but will do the wrong thing in the presence of `StaticArrays`.
\ No newline at end of file
diff --git a/ext/TrixiCUDAExt.jl b/ext/TrixiCUDAExt.jl
new file mode 100644
index 00000000000..681d2f53a1e
--- /dev/null
+++ b/ext/TrixiCUDAExt.jl
@@ -0,0 +1,11 @@
+# Package extension for adding CUDA-based features to Trixi.jl
+module TrixiCUDAExt
+
+import CUDA: CuArray
+import Trixi
+
+function Trixi.storage_type(::Type{<:CuArray})
+    return CuArray
+end
+
+end
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 84ca91b3070..d5ff21192c4 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -44,6 +44,7 @@ import SciMLBase: get_du, get_tmp_cache, u_modified!,
 
 using DelimitedFiles: readdlm
 using Downloads: Downloads
+using Adapt: Adapt, adapt
 using CodeTracking: CodeTracking
 using ConstructionBase: ConstructionBase
 using DiffEqBase: DiffEqBase, get_tstops, get_tstops_array
@@ -125,6 +126,7 @@ include("basic_types.jl")
 
 # Include all top-level source files
 include("auxiliary/auxiliary.jl")
+include("auxiliary/vector_of_arrays.jl")
 include("auxiliary/mpi.jl")
 include("auxiliary/p4est.jl")
 include("auxiliary/t8code.jl")
diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl
index 90650f6abcf..5738467ec6b 100644
--- a/src/auxiliary/containers.jl
+++ b/src/auxiliary/containers.jl
@@ -314,4 +314,88 @@ end
 function raw_copy!(c::AbstractContainer, from::Int, destination::Int)
     raw_copy!(c, c, from, from, destination)
 end
+
+# Trixi storage types must implement these two Adapt.jl methods
+function Adapt.adapt_structure(to, c::AbstractContainer)
+    error("Interface: Must implement Adapt.adapt_structure(to, ::$(typeof(c)))")
+end
+
+function Adapt.parent_type(C::Type{<:AbstractContainer})
+    error("Interface: Must implement Adapt.parent_type(::Type{$C}")
+end
+
+function Adapt.unwrap_type(C::Type{<:AbstractContainer})
+    return Adapt.unwrap_type(Adapt.parent_type(C))
+end
+
+# TODO: Upstream to Adapt
+function storage_type(x)
+    return storage_type(typeof(x))
+end
+
+function storage_type(T::Type)
+    error("Interface: Must implement storage_type(::Type{$T}")
+end
+
+function storage_type(::Type{<:Array})
+    Array
+end
+
+function storage_type(C::Type{<:AbstractContainer})
+    return storage_type(Adapt.unwrap_type(C))
+end
+
+# For some storage backends like CUDA.jl, empty arrays do seem to simply be
+# null pointers which can cause `unsafe_wrap` to fail when calling
+# Adapt.adapt (ArgumentError, see
+# https://github.com/JuliaGPU/CUDA.jl/blob/v5.4.2/src/array.jl#L212-L229).
+# To circumvent this, on length zero arrays this allocates
+# a separate empty array instead of wrapping.
+# However, since zero length arrays are not used in calculations,
+# it should be okay if the underlying storage vectors and wrapped arrays
+# are not the same as long as they are properly wrapped when `resize!`d etc.
+function unsafe_wrap_or_alloc(to, vector, size)
+    if length(vector) == 0
+        return similar(vector, size)
+    else
+        return unsafe_wrap(to, pointer(vector), size)
+    end
+end
+
+struct TrixiAdaptor{Storage, Real} end
+
+function trixi_adapt(storage, real, x)
+    adapt(TrixiAdaptor{storage, real}(), x)
+end
+
+# Custom rules
+# 1. handling of StaticArrays
+function Adapt.adapt_storage(::TrixiAdaptor{<:Any, Real},
+                             x::StaticArrays.StaticArray{S, T, N}) where {Real, S, T, N}
+    StaticArrays.similar_type(x, Real)(x)
+end
+
+# 2. Handling of Arrays
+function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
+                             x::AbstractArray{T}) where {Storage, Real,
+                                                         T <: AbstractFloat}
+    adapt(Storage{Real}, x)
+end
+
+function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
+                             x::AbstractArray{T}) where {Storage, Real,
+                                                         T <: StaticArrays.StaticArray}
+    adapt(Storage{StaticArrays.similar_type(T, Real)}, x)
+end
+
+function Adapt.adapt_storage(::TrixiAdaptor{Storage, Real},
+                             x::AbstractArray) where {Storage, Real}
+    adapt(Storage, x)
+end
+
+# 3. TODO: Should we have a fallback? But that would imply implementing things for NamedTuple again
+
+function unsafe_wrap_or_alloc(::TrixiAdaptor{Storage}, vec, size) where {Storage}
+    return unsafe_wrap_or_alloc(Storage, vec, size)
+end
 end # @muladd
diff --git a/src/auxiliary/vector_of_arrays.jl b/src/auxiliary/vector_of_arrays.jl
new file mode 100644
index 00000000000..0fa8dd7f1ec
--- /dev/null
+++ b/src/auxiliary/vector_of_arrays.jl
@@ -0,0 +1,31 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# Wraps a Vector of Arrays, forwards `getindex` to the underlying Vector.
+# Implements `Adapt.adapt_structure` to allow offloading to the GPU which is
+# not possible for a plain Vector of Arrays.
+struct VecOfArrays{T <: AbstractArray}
+    arrays::Vector{T}
+end
+Base.getindex(v::VecOfArrays, i::Int) = Base.getindex(v.arrays, i)
+Base.IndexStyle(v::VecOfArrays) = Base.IndexStyle(v.arrays)
+Base.size(v::VecOfArrays) = Base.size(v.arrays)
+Base.length(v::VecOfArrays) = Base.length(v.arrays)
+Base.eltype(v::VecOfArrays{T}) where {T} = T
+function Adapt.adapt_structure(to, v::VecOfArrays)
+    return VecOfArrays([Adapt.adapt(to, arr) for arr in v.arrays])
+end
+function Adapt.parent_type(::Type{<:VecOfArrays{T}}) where {T}
+    return T
+end
+function Adapt.unwrap_type(A::Type{<:VecOfArrays})
+    Adapt.unwrap_type(Adapt.parent_type(A))
+end
+function Base.convert(::Type{<:VecOfArrays}, v::Vector{<:AbstractArray})
+    VecOfArrays(v)
+end
+end # @muladd
diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
index c909196b5db..f86be5dc069 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -27,25 +27,6 @@ mutable struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
     solver::Solver
     cache::Cache
     performance_counter::PerformanceCounter
-
-    function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
-                                          BoundaryConditions, SourceTerms, Solver,
-                                          Cache}(mesh::Mesh, equations::Equations,
-                                                 initial_condition::InitialCondition,
-                                                 boundary_conditions::BoundaryConditions,
-                                                 source_terms::SourceTerms,
-                                                 solver::Solver,
-                                                 cache::Cache) where {Mesh, Equations,
-                                                                      InitialCondition,
-                                                                      BoundaryConditions,
-                                                                      SourceTerms,
-                                                                      Solver,
-                                                                      Cache}
-        performance_counter = PerformanceCounter()
-
-        new(mesh, equations, initial_condition, boundary_conditions, source_terms,
-            solver, cache, performance_counter)
-    end
 end
 
 """
@@ -74,6 +55,8 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver
 
     check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions)
 
+    performance_counter = PerformanceCounter()
+
     SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
                                  typeof(initial_condition),
                                  typeof(_boundary_conditions), typeof(source_terms),
@@ -81,9 +64,13 @@ function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver
                                                                 initial_condition,
                                                                 _boundary_conditions,
                                                                 source_terms, solver,
-                                                                cache)
+                                                                cache,
+                                                                performance_counter)
 end
 
+# @eval due to @muladd
+@eval Adapt.@adapt_structure(SemidiscretizationHyperbolic)
+
 # Create a new semidiscretization but change some parameters compared to the input.
 # `Base.similar` follows a related concept but would require us to `copy` the `mesh`,
 # which would impact the performance. Instead, `SciMLBase.remake` has exactly the
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 20b989da334..28774e0029a 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -400,6 +400,9 @@ struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral}
     volume_integral::VolumeIntegral
 end
 
+# @eval due to @muladd
+@eval Adapt.@adapt_structure(DG)
+
 function Base.show(io::IO, dg::DG)
     @nospecialize dg # reduce precompilation time
 
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index 0ba32565fbb..90c094e427d 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -34,6 +34,32 @@ struct LobattoLegendreBasis{RealT <: Real, NNODES,
     # negative adjoint wrt the SBP dot product
 end
 
+function Adapt.adapt_structure(to, basis::LobattoLegendreBasis)
+    inverse_vandermonde_legendre = adapt(to, basis.inverse_vandermonde_legendre)
+    RealT = eltype(inverse_vandermonde_legendre)
+
+    nodes = SVector{<:Any, RealT}(basis.nodes)
+    weights = SVector{<:Any, RealT}(basis.weights)
+    inverse_weights = SVector{<:Any, RealT}(basis.inverse_weights)
+    boundary_interpolation = adapt(to, basis.boundary_interpolation)
+    derivative_matrix = adapt(to, basis.derivative_matrix)
+    derivative_split = adapt(to, basis.derivative_split)
+    derivative_split_transpose = adapt(to, basis.derivative_split_transpose)
+    derivative_dhat = adapt(to, basis.derivative_dhat)
+    return LobattoLegendreBasis{RealT, nnodes(basis), typeof(nodes),
+                                typeof(inverse_vandermonde_legendre),
+                                typeof(boundary_interpolation),
+                                typeof(derivative_matrix)}(nodes,
+                                                           weights,
+                                                           inverse_weights,
+                                                           inverse_vandermonde_legendre,
+                                                           boundary_interpolation,
+                                                           derivative_matrix,
+                                                           derivative_split,
+                                                           derivative_split_transpose,
+                                                           derivative_dhat)
+end
+
 function LobattoLegendreBasis(RealT, polydeg::Integer)
     nnodes_ = polydeg + 1
 
@@ -155,6 +181,17 @@ struct LobattoLegendreMortarL2{RealT <: Real, NNODES,
     reverse_lower::ReverseMatrix
 end
 
+function Adapt.adapt_structure(to, mortar::LobattoLegendreMortarL2)
+    forward_upper = adapt(to, mortar.forward_upper)
+    forward_lower = adapt(to, mortar.forward_lower)
+    reverse_upper = adapt(to, mortar.reverse_upper)
+    reverse_lower = adapt(to, mortar.reverse_lower)
+    return LobattoLegendreMortarL2{eltype(forward_upper), nnodes(mortar),
+                                   typeof(forward_upper),
+                                   typeof(reverse_upper)}(forward_upper, forward_lower,
+                                                          reverse_upper, reverse_lower)
+end
+
 function MortarL2(basis::LobattoLegendreBasis)
     RealT = real(basis)
     nnodes_ = nnodes(basis)
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index a070db6b701..68e5b3d758b 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -6,25 +6,31 @@
 #! format: noindent
 
 mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1,
-                                     NDIMSP2, NDIMSP3} <: AbstractContainer
+                                     NDIMSP2, NDIMSP3,
+                                     ArrayNDIMSP1 <: DenseArray{RealT, NDIMSP1},
+                                     ArrayNDIMSP2 <: DenseArray{RealT, NDIMSP2},
+                                     ArrayNDIMSP3 <: DenseArray{RealT, NDIMSP3},
+                                     VectorRealT <: DenseVector{RealT},
+                                     VectoruEltype <: DenseVector{uEltype}} <:
+               AbstractContainer
     # Physical coordinates at each node
-    node_coordinates::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
+    node_coordinates::ArrayNDIMSP2   # [orientation, node_i, node_j, node_k, element]
     # Jacobian matrix of the transformation
     # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
-    jacobian_matrix::Array{RealT, NDIMSP3}
+    jacobian_matrix::ArrayNDIMSP3
     # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
-    contravariant_vectors::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
+    contravariant_vectors::ArrayNDIMSP3   # [dimension, index, node_i, node_j, node_k, element]
     # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
-    inverse_jacobian::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
+    inverse_jacobian::ArrayNDIMSP1   # [node_i, node_j, node_k, element]
     # Buffer for calculated surface flux
-    surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
+    surface_flux_values::ArrayNDIMSP2 # [variable, i, j, direction, element]
 
     # internal `resize!`able storage
-    _node_coordinates::Vector{RealT}
-    _jacobian_matrix::Vector{RealT}
-    _contravariant_vectors::Vector{RealT}
-    _inverse_jacobian::Vector{RealT}
-    _surface_flux_values::Vector{uEltype}
+    _node_coordinates::VectorRealT
+    _jacobian_matrix::VectorRealT
+    _contravariant_vectors::VectorRealT
+    _inverse_jacobian::VectorRealT
+    _surface_flux_values::VectoruEltype
 end
 
 @inline function nelements(elements::P4estElementContainer)
@@ -36,7 +42,7 @@ end
                                                                                     RealT,
                                                                                     uEltype
                                                                                     }
-    uEltype
+    return uEltype
 end
 
 # Only one-dimensional `Array`s are `resize!`able in Julia.
@@ -51,28 +57,30 @@ function Base.resize!(elements::P4estElementContainer, capacity)
     n_dims = ndims(elements)
     n_nodes = size(elements.node_coordinates, 2)
     n_variables = size(elements.surface_flux_values, 1)
+    ArrayType = storage_type(elements)
 
     resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity)
-    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+    elements.node_coordinates = unsafe_wrap(ArrayType, pointer(_node_coordinates),
                                             (n_dims, ntuple(_ -> n_nodes, n_dims)...,
                                              capacity))
 
     resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity)
-    elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
+    elements.jacobian_matrix = unsafe_wrap(ArrayType, pointer(_jacobian_matrix),
                                            (n_dims, n_dims,
                                             ntuple(_ -> n_nodes, n_dims)..., capacity))
 
     resize!(_contravariant_vectors, length(_jacobian_matrix))
-    elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
+    elements.contravariant_vectors = unsafe_wrap(ArrayType,
+                                                 pointer(_contravariant_vectors),
                                                  size(elements.jacobian_matrix))
 
     resize!(_inverse_jacobian, n_nodes^n_dims * capacity)
-    elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
+    elements.inverse_jacobian = unsafe_wrap(ArrayType, pointer(_inverse_jacobian),
                                             (ntuple(_ -> n_nodes, n_dims)..., capacity))
 
     resize!(_surface_flux_values,
             n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity)
-    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+    elements.surface_flux_values = unsafe_wrap(ArrayType, pointer(_surface_flux_values),
                                                (n_variables,
                                                 ntuple(_ -> n_nodes, n_dims - 1)...,
                                                 n_dims * 2, capacity))
@@ -117,33 +125,104 @@ function init_elements(mesh::Union{P4estMesh{NDIMS, NDIMS, RealT},
                                        NDIMS * 2, nelements))
 
     elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2,
-                                     NDIMS + 3}(node_coordinates, jacobian_matrix,
-                                                contravariant_vectors,
-                                                inverse_jacobian, surface_flux_values,
-                                                _node_coordinates, _jacobian_matrix,
-                                                _contravariant_vectors,
-                                                _inverse_jacobian, _surface_flux_values)
+                                     NDIMS + 3, Array{RealT, NDIMS + 1},
+                                     Array{RealT, NDIMS + 2}, Array{RealT, NDIMS + 3},
+                                     Vector{RealT}, Vector{uEltype}}(node_coordinates,
+                                                                     jacobian_matrix,
+                                                                     contravariant_vectors,
+                                                                     inverse_jacobian,
+                                                                     surface_flux_values,
+                                                                     _node_coordinates,
+                                                                     _jacobian_matrix,
+                                                                     _contravariant_vectors,
+                                                                     _inverse_jacobian,
+                                                                     _surface_flux_values)
 
     init_elements!(elements, mesh, basis)
     return elements
 end
 
-mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
+function Adapt.parent_type(::Type{<:P4estElementContainer{<:Any, <:Any, <:Any, <:Any,
+                                                          <:Any, <:Any, ArrayT}}) where {ArrayT}
+    ArrayT
+end
+
+# Manual adapt_structure since we have aliasing memory
+function Adapt.adapt_structure(to,
+                               elements::P4estElementContainer{NDIMS}) where {NDIMS}
+    # Adapt underlying storage
+    _node_coordinates = adapt(to, elements._node_coordinates)
+    _jacobian_matrix = adapt(to, elements._jacobian_matrix)
+    _contravariant_vectors = adapt(to, elements._contravariant_vectors)
+    _inverse_jacobian = adapt(to, elements._inverse_jacobian)
+    _surface_flux_values = adapt(to, elements._surface_flux_values)
+
+    RealT = eltype(_inverse_jacobian)
+    uEltype = eltype(_surface_flux_values)
+
+    # Wrap arrays again
+    node_coordinates = unsafe_wrap_or_alloc(to, _node_coordinates,
+                                            size(elements.node_coordinates))
+    jacobian_matrix = unsafe_wrap_or_alloc(to, _jacobian_matrix,
+                                           size(elements.jacobian_matrix))
+    contravariant_vectors = unsafe_wrap_or_alloc(to, _contravariant_vectors,
+                                                 size(jacobian_matrix))
+    inverse_jacobian = unsafe_wrap_or_alloc(to, _inverse_jacobian,
+                                            size(elements.inverse_jacobian))
+    surface_flux_values = unsafe_wrap_or_alloc(to, _surface_flux_values,
+                                               size(elements.surface_flux_values))
+
+    new_type_params = (NDIMS,
+                       RealT,
+                       uEltype,
+                       NDIMS + 1,
+                       NDIMS + 2,
+                       NDIMS + 3,
+                       typeof(inverse_jacobian), # ArrayNDIMSP1
+                       typeof(node_coordinates), # ArrayNDIMSP2
+                       typeof(jacobian_matrix), # ArrayNDIMSP3
+                       typeof(_node_coordinates), # VectorRealT
+                       typeof(_surface_flux_values)) # VectoruEltype
+    return P4estElementContainer{new_type_params...}(node_coordinates,
+                                                     jacobian_matrix,
+                                                     contravariant_vectors,
+                                                     inverse_jacobian,
+                                                     surface_flux_values,
+                                                     _node_coordinates,
+                                                     _jacobian_matrix,
+                                                     _contravariant_vectors,
+                                                     _inverse_jacobian,
+                                                     _surface_flux_values)
+end
+
+mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2,
+                                       uArray <: DenseArray{uEltype, NDIMSP2},
+                                       IdsMatrix <: DenseMatrix{Int},
+                                       IndicesMatrix <:
+                                       DenseMatrix{NTuple{NDIMS, Symbol}},
+                                       uVector <: DenseVector{uEltype},
+                                       IdsVector <: DenseVector{Int},
+                                       IndicesVector <:
+                                       DenseVector{NTuple{NDIMS, Symbol}}} <:
                AbstractContainer
-    u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
-    neighbor_ids::Matrix{Int}                   # [primary/secondary, interface]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface]
+    u::uArray       # [primary/secondary, variable, i, j, interface]
+    neighbor_ids::IdsMatrix                   # [primary/secondary, interface]
+    node_indices::IndicesMatrix # [primary/secondary, interface]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
-    _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _u::uVector
+    _neighbor_ids::IdsVector
+    _node_indices::IndicesVector
 end
 
 @inline function ninterfaces(interfaces::P4estInterfaceContainer)
     size(interfaces.neighbor_ids, 2)
 end
 @inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estInterfaceContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                               uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
@@ -152,17 +231,20 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
     n_dims = ndims(interfaces)
     n_nodes = size(interfaces.u, 3)
     n_variables = size(interfaces.u, 2)
+    ArrayType = storage_type(interfaces)
 
     resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
-    interfaces.u = unsafe_wrap(Array, pointer(_u),
+    interfaces.u = unsafe_wrap(ArrayType, pointer(_u),
                                (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                 capacity))
 
     resize!(_neighbor_ids, 2 * capacity)
-    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity))
+    interfaces.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids),
+                                          (2, capacity))
 
     resize!(_node_indices, 2 * capacity)
-    interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    interfaces.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices),
+                                          (2, capacity))
 
     return nothing
 end
@@ -189,10 +271,15 @@ function init_interfaces(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa
     _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces))
 
-    interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids,
-                                                                    node_indices,
-                                                                    _u, _neighbor_ids,
-                                                                    _node_indices)
+    interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2,
+                                         typeof(u), typeof(neighbor_ids),
+                                         typeof(node_indices), typeof(_u),
+                                         typeof(_neighbor_ids), typeof(_node_indices)}(u,
+                                                                                       neighbor_ids,
+                                                                                       node_indices,
+                                                                                       _u,
+                                                                                       _neighbor_ids,
+                                                                                       _node_indices)
 
     init_interfaces!(interfaces, mesh)
 
@@ -205,21 +292,58 @@ function init_interfaces!(interfaces, mesh::Union{P4estMesh, P4estMeshView})
     return interfaces
 end
 
-mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <:
+function Adapt.parent_type(::Type{<:P4estInterfaceContainer{<:Any, <:Any, <:Any,
+                                                            ArrayT}}) where {ArrayT}
+    ArrayT
+end
+
+# Manual adapt_structure since we have aliasing memory
+function Adapt.adapt_structure(to, interfaces::P4estInterfaceContainer)
+    # Adapt underlying storage
+    _u = adapt(to, interfaces._u)
+    _neighbor_ids = adapt(to, interfaces._neighbor_ids)
+    _node_indices = adapt(to, interfaces._node_indices)
+    # Wrap arrays again
+    u = unsafe_wrap_or_alloc(to, _u, size(interfaces.u))
+    neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids,
+                                        size(interfaces.neighbor_ids))
+    node_indices = unsafe_wrap_or_alloc(to, _node_indices,
+                                        size(interfaces.node_indices))
+
+    NDIMS = ndims(interfaces)
+    new_type_params = (NDIMS,
+                       eltype(_u),
+                       NDIMS + 2,
+                       typeof(u), typeof(neighbor_ids), typeof(node_indices),
+                       typeof(_u), typeof(_neighbor_ids), typeof(_node_indices))
+    return P4estInterfaceContainer{new_type_params...}(u, neighbor_ids, node_indices,
+                                                       _u, _neighbor_ids, _node_indices)
+end
+
+mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1,
+                                      uArray <: DenseArray{uEltype, NDIMSP1},
+                                      IdsVector <: DenseVector{Int},
+                                      IndicesVector <:
+                                      DenseVector{NTuple{NDIMS, Symbol}},
+                                      uVector <: DenseVector{uEltype}} <:
                AbstractContainer
-    u::Array{uEltype, NDIMSP1}       # [variables, i, j, boundary]
-    neighbor_ids::Vector{Int}                   # [boundary]
-    node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary]
+    u::uArray       # [variables, i, j, boundary]
+    neighbor_ids::IdsVector                 # [boundary]
+    node_indices::IndicesVector # [boundary]
     name::Vector{Symbol}                # [boundary]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
 end
 
 @inline function nboundaries(boundaries::P4estBoundaryContainer)
     length(boundaries.neighbor_ids)
 end
 @inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estBoundaryContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                              uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
@@ -228,9 +352,10 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
     n_dims = ndims(boundaries)
     n_nodes = size(boundaries.u, 2)
     n_variables = size(boundaries.u, 1)
+    ArrayType = storage_type(boundaries)
 
     resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity)
-    boundaries.u = unsafe_wrap(Array, pointer(_u),
+    boundaries.u = unsafe_wrap(ArrayType, pointer(_u),
                                (n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                 capacity))
 
@@ -263,9 +388,11 @@ function init_boundaries(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equa
     node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries)
     names = Vector{Symbol}(undef, n_boundaries)
 
-    boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids,
-                                                                   node_indices, names,
-                                                                   _u)
+    boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1, typeof(u),
+                                        typeof(neighbor_ids), typeof(node_indices),
+                                        typeof(_u)}(u, neighbor_ids,
+                                                    node_indices, names,
+                                                    _u)
 
     if n_boundaries > 0
         init_boundaries!(boundaries, mesh)
@@ -312,6 +439,25 @@ function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh)
     return nothing
 end
 
+function Adapt.parent_type(::Type{<:P4estBoundaryContainer{<:Any, <:Any, <:Any, ArrayT}}) where {ArrayT}
+    ArrayT
+end
+
+# Manual adapt_structure since we have aliasing memory
+function Adapt.adapt_structure(to, boundaries::P4estBoundaryContainer)
+    _u = adapt(to, boundaries._u)
+    u = unsafe_wrap_or_alloc(to, _u, size(boundaries.u))
+    neighbor_ids = adapt(to, boundaries.neighbor_ids)
+    node_indices = adapt(to, boundaries.node_indices)
+    name = boundaries.name
+
+    NDIMS = ndims(boundaries)
+    return P4estBoundaryContainer{NDIMS, eltype(_u), NDIMS + 1, typeof(u),
+                                  typeof(neighbor_ids), typeof(node_indices),
+                                  typeof(_u)}(u, neighbor_ids, node_indices,
+                                              name, _u)
+end
+
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D)
@@ -337,20 +483,32 @@ end
 # │ └─────────────┴─────────────┘  └───────────────────────────┘
 # │
 # ⋅────> ξ
-mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <:
+mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3,
+                                    uArray <: DenseArray{uEltype, NDIMSP3},
+                                    IdsMatrix <: DenseMatrix{Int},
+                                    IndicesMatrix <:
+                                    DenseMatrix{NTuple{NDIMS, Symbol}},
+                                    uVector <: DenseVector{uEltype},
+                                    IdsVector <: DenseVector{Int},
+                                    IndicesVector <:
+                                    DenseVector{NTuple{NDIMS, Symbol}}} <:
                AbstractContainer
-    u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
-    neighbor_ids::Matrix{Int}             # [position, mortar]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+    u::uArray # [small/large side, variable, position, i, j, mortar]
+    neighbor_ids::IdsMatrix # [position, mortar]
+    node_indices::IndicesMatrix # [small/large, mortar]
 
     # internal `resize!`able storage
-    _u::Vector{uEltype}
-    _neighbor_ids::Vector{Int}
-    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _u::uVector
+    _neighbor_ids::IdsVector
+    _node_indices::IndicesVector
 end
 
 @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2)
 @inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estMortarContainer{NDIMS, uEltype}) where {NDIMS,
+                                                                            uEltype}
+    uEltype
+end
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mortars::P4estMortarContainer, capacity)
@@ -359,18 +517,19 @@ function Base.resize!(mortars::P4estMortarContainer, capacity)
     n_dims = ndims(mortars)
     n_nodes = size(mortars.u, 4)
     n_variables = size(mortars.u, 2)
+    ArrayType = storage_type(mortars)
 
     resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity)
-    mortars.u = unsafe_wrap(Array, pointer(_u),
+    mortars.u = unsafe_wrap(ArrayType, pointer(_u),
                             (2, n_variables, 2^(n_dims - 1),
                              ntuple(_ -> n_nodes, n_dims - 1)..., capacity))
 
     resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity)
-    mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+    mortars.neighbor_ids = unsafe_wrap(ArrayType, pointer(_neighbor_ids),
                                        (2^(n_dims - 1) + 1, capacity))
 
     resize!(_node_indices, 2 * capacity)
-    mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    mortars.node_indices = unsafe_wrap(ArrayType, pointer(_node_indices), (2, capacity))
 
     return nothing
 end
@@ -398,12 +557,15 @@ function init_mortars(mesh::Union{P4estMesh, P4estMeshView, T8codeMesh}, equatio
     _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars)
     node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars))
 
-    mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u,
-                                                                         neighbor_ids,
-                                                                         node_indices,
-                                                                         _u,
-                                                                         _neighbor_ids,
-                                                                         _node_indices)
+    mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3, typeof(u),
+                                   typeof(neighbor_ids), typeof(node_indices),
+                                   typeof(_u), typeof(_neighbor_ids),
+                                   typeof(_node_indices)}(u,
+                                                          neighbor_ids,
+                                                          node_indices,
+                                                          _u,
+                                                          _neighbor_ids,
+                                                          _node_indices)
 
     if n_mortars > 0
         init_mortars!(mortars, mesh)
@@ -418,6 +580,34 @@ function init_mortars!(mortars, mesh::Union{P4estMesh, P4estMeshView})
     return mortars
 end
 
+function Adapt.parent_type(::Type{<:P4estMortarContainer{<:Any, <:Any, <:Any, <:Any,
+                                                         ArrayT}}) where {ArrayT}
+    ArrayT
+end
+
+# Manual adapt_structure since we have aliasing memory
+function Adapt.adapt_structure(to, mortars::P4estMortarContainer)
+    # Adapt underlying storage
+    _u = adapt(to, mortars._u)
+    _neighbor_ids = adapt(to, mortars._neighbor_ids)
+    _node_indices = adapt(to, mortars._node_indices)
+
+    # Wrap arrays again
+    u = unsafe_wrap_or_alloc(to, _u, size(mortars.u))
+    neighbor_ids = unsafe_wrap_or_alloc(to, _neighbor_ids, size(mortars.neighbor_ids))
+    node_indices = unsafe_wrap_or_alloc(to, _node_indices, size(mortars.node_indices))
+
+    NDIMS = ndims(mortars)
+    new_type_params = (NDIMS,
+                       eltype(_u),
+                       NDIMS + 1,
+                       NDIMS + 3,
+                       typeof(u), typeof(neighbor_ids), typeof(node_indices),
+                       typeof(_u), typeof(_neighbor_ids), typeof(_node_indices))
+    return P4estMortarContainer{new_type_params...}(u, neighbor_ids, node_indices,
+                                                    _u, _neighbor_ids, _node_indices)
+end
+
 function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache)
     # Re-initialize elements container
     @unpack elements = cache
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index 676b37efff3..cb9cd1ffc95 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -5,15 +5,19 @@
 @muladd begin
 #! format: noindent
 
-mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
+mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2,
+                                          uArray <: DenseArray{uEltype, NDIMSP2},
+                                          VecInt <: DenseVector{Int},
+                                          IndicesVector <:
+                                          DenseVector{NTuple{NDIMS, Symbol}},
+                                          uVector <: DenseVector{uEltype}} <:
                AbstractContainer
-    u::Array{uEltype, NDIMSP2}                  # [primary/secondary, variable, i, j, interface]
-    local_neighbor_ids::Vector{Int}             # [interface]
-    node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface]
-    local_sides::Vector{Int}                    # [interface]
-
+    u::uArray                   # [primary/secondary, variable, i, j, interface]
+    local_neighbor_ids::VecInt  # [interface]
+    node_indices::IndicesVector # [interface]
+    local_sides::VecInt         # [interface]
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
 end
 
 @inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer)
@@ -27,9 +31,10 @@ function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity)
     n_dims = ndims(mpi_interfaces)
     n_nodes = size(mpi_interfaces.u, 3)
     n_variables = size(mpi_interfaces.u, 2)
+    ArrayType = storage_type(mpi_interfaces)
 
     resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
-    mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
+    mpi_interfaces.u = unsafe_wrap(ArrayType, pointer(_u),
                                    (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
                                     capacity))
 
@@ -64,11 +69,13 @@ function init_mpi_interfaces(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh},
 
     local_sides = Vector{Int}(undef, n_mpi_interfaces)
 
-    mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u,
-                                                                           local_neighbor_ids,
-                                                                           node_indices,
-                                                                           local_sides,
-                                                                           _u)
+    mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2,
+                                                typeof(u), typeof(local_neighbor_ids),
+                                                typeof(node_indices), typeof(_u)}(u,
+                                                                                  local_neighbor_ids,
+                                                                                  node_indices,
+                                                                                  local_sides,
+                                                                                  _u)
 
     init_mpi_interfaces!(mpi_interfaces, mesh)
 
@@ -81,6 +88,32 @@ function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh)
     return mpi_interfaces
 end
 
+function Adapt.parent_type(::Type{<:Trixi.P4estMPIInterfaceContainer{<:Any, <:Any,
+                                                                     <:Any, A}}) where {A}
+    return A
+end
+
+# Manual adapt_structure since we have aliasing memory
+function Adapt.adapt_structure(to, mpi_interfaces::P4estMPIInterfaceContainer)
+    # Adapt Vectors and underlying storage
+    _u = adapt(to, mpi_interfaces._u)
+    local_neighbor_ids = adapt(to, mpi_interfaces.local_neighbor_ids)
+    node_indices = adapt(to, mpi_interfaces.node_indices)
+    local_sides = adapt(to, mpi_interfaces.local_sides)
+
+    # Wrap array again
+    u = unsafe_wrap_or_alloc(to, _u, size(mpi_interfaces.u))
+
+    NDIMS = ndims(mpi_interfaces)
+    return P4estMPIInterfaceContainer{NDIMS, eltype(u),
+                                      NDIMS + 2,
+                                      typeof(u), typeof(local_neighbor_ids),
+                                      typeof(node_indices), typeof(_u)}(u,
+                                                                        local_neighbor_ids,
+                                                                        node_indices,
+                                                                        local_sides, _u)
+end
+
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into
@@ -88,14 +121,17 @@ end
 # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds
 # the normal vectors on the surface of the small elements for each mortar.
 mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1,
-                                       NDIMSP2, NDIMSP3} <: AbstractContainer
-    u::Array{uEltype, NDIMSP3}                    # [small/large side, variable, position, i, j, mortar]
-    local_neighbor_ids::Vector{Vector{Int}}       # [mortar][ids]
-    local_neighbor_positions::Vector{Vector{Int}} # [mortar][positions]
-    node_indices::Matrix{NTuple{NDIMS, Symbol}}   # [small/large, mortar]
-    normal_directions::Array{RealT, NDIMSP2}      # [dimension, i, j, position, mortar]
+                                       NDIMSP2, NDIMSP3,
+                                       uArray <: DenseArray{uEltype, NDIMSP3},
+                                       uVector <: DenseVector{uEltype}} <:
+               AbstractContainer
+    u::uArray                                      # [small/large side, variable, position, i, j, mortar]
+    local_neighbor_ids::Vector{Vector{Int}}        # [mortar][ids]
+    local_neighbor_positions::Vector{Vector{Int}}  # [mortar][positions]
+    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+    normal_directions::Array{RealT, NDIMSP2}       # [dimension, i, j, position, mortar]
     # internal `resize!`able storage
-    _u::Vector{uEltype}
+    _u::uVector
     _node_indices::Vector{NTuple{NDIMS, Symbol}}
     _normal_directions::Vector{RealT}
 end
@@ -164,11 +200,12 @@ function init_mpi_mortars(mesh::Union{ParallelP4estMesh, ParallelT8codeMesh}, eq
                                      2^(NDIMS - 1), n_mpi_mortars))
 
     mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2,
-                                          NDIMS + 3}(u, local_neighbor_ids,
-                                                     local_neighbor_positions,
-                                                     node_indices, normal_directions,
-                                                     _u, _node_indices,
-                                                     _normal_directions)
+                                          NDIMS + 3, typeof(u),
+                                          typeof(_u)}(u, local_neighbor_ids,
+                                                      local_neighbor_positions,
+                                                      node_indices, normal_directions,
+                                                      _u, _node_indices,
+                                                      _normal_directions)
 
     if n_mpi_mortars > 0
         init_mpi_mortars!(mpi_mortars, mesh, basis, elements)
@@ -184,6 +221,33 @@ function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements
     return mpi_mortars
 end
 
+function Adapt.adapt_structure(to, mpi_mortars::P4estMPIMortarContainer)
+    # TODO: Vector of Vector type data structure does not work on GPUs,
+    # must be redesigned. This skeleton implementation here just exists just
+    # for compatibility with the rest of the KA.jl solver code
+
+    _u = adapt(to, mpi_mortars._u)
+    _node_indices = mpi_mortars._node_indices
+    _normal_directions = mpi_mortars._normal_directions
+
+    u = unsafe_wrap_or_alloc(to, _u, size(mpi_mortars.u))
+    local_neighbor_ids = mpi_mortars.local_neighbor_ids
+    local_neighbor_positions = mpi_mortars.local_neighbor_positions
+    node_indices = mpi_mortars.node_indices
+    normal_directions = mpi_mortars.normal_directions
+
+    NDIMS = ndims(mpi_mortars)
+    return P4estMPIMortarContainer{NDIMS, eltype(_u),
+                                   eltype(_normal_directions),
+                                   NDIMS + 1, NDIMS + 2, NDIMS + 3,
+                                   typeof(u), typeof(_u)}(u, local_neighbor_ids,
+                                                          local_neighbor_positions,
+                                                          node_indices,
+                                                          normal_directions, _u,
+                                                          _node_indices,
+                                                          _normal_directions)
+end
+
 # Overload init! function for regular interfaces, regular mortars and boundaries since they must
 # call the appropriate init_surfaces! function for parallel p4est meshes
 function init_interfaces!(interfaces, mesh::ParallelP4estMesh)
diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl
index 2cc201dd1f0..7acddf07b4b 100644
--- a/src/solvers/dgsem_p4est/dg_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_parallel.jl
@@ -5,12 +5,13 @@
 @muladd begin
 #! format: noindent
 
-mutable struct P4estMPICache{uEltype}
+mutable struct P4estMPICache{BufferType <: DenseVector,
+                             VecInt <: DenseVector{<:Integer}}
     mpi_neighbor_ranks::Vector{Int}
-    mpi_neighbor_interfaces::Vector{Vector{Int}}
-    mpi_neighbor_mortars::Vector{Vector{Int}}
-    mpi_send_buffers::Vector{Vector{uEltype}}
-    mpi_recv_buffers::Vector{Vector{uEltype}}
+    mpi_neighbor_interfaces::VecOfArrays{VecInt}
+    mpi_neighbor_mortars::VecOfArrays{VecInt}
+    mpi_send_buffers::VecOfArrays{BufferType}
+    mpi_recv_buffers::VecOfArrays{BufferType}
     mpi_send_requests::Vector{MPI.Request}
     mpi_recv_requests::Vector{MPI.Request}
     n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
@@ -25,25 +26,29 @@ function P4estMPICache(uEltype)
     end
 
     mpi_neighbor_ranks = Vector{Int}(undef, 0)
-    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
-    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
-    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
-    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0) |> VecOfArrays
+    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
+    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0) |> VecOfArrays
     mpi_send_requests = Vector{MPI.Request}(undef, 0)
     mpi_recv_requests = Vector{MPI.Request}(undef, 0)
     n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
     n_elements_global = 0
     first_element_global_id = 0
 
-    P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
-                           mpi_neighbor_mortars,
-                           mpi_send_buffers, mpi_recv_buffers,
-                           mpi_send_requests, mpi_recv_requests,
-                           n_elements_by_rank, n_elements_global,
-                           first_element_global_id)
+    P4estMPICache{Vector{uEltype}, Vector{Int}}(mpi_neighbor_ranks,
+                                                mpi_neighbor_interfaces,
+                                                mpi_neighbor_mortars,
+                                                mpi_send_buffers, mpi_recv_buffers,
+                                                mpi_send_requests, mpi_recv_requests,
+                                                n_elements_by_rank, n_elements_global,
+                                                first_element_global_id)
 end
 
-@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype
+@inline Base.eltype(::P4estMPICache{BufferType}) where {BufferType} = eltype(BufferType)
+
+# @eval due to @muladd
+@eval Adapt.@adapt_structure(P4estMPICache)
 
 ##
 # Note that the code in `start_mpi_send`/`finish_mpi_receive!` is sensitive to inference on (at least) Julia 1.10.
@@ -265,16 +270,16 @@ end
 
 function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
                          mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype)
-    mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
-                                                                                                       mpi_mortars,
-                                                                                                       mesh)
+    mpi_neighbor_ranks, _mpi_neighbor_interfaces, _mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
+                                                                                                         mpi_mortars,
+                                                                                                         mesh)
 
-    mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces,
-                                                                                                        mpi_neighbor_mortars,
-                                                                                                        ndims(mesh),
-                                                                                                        nvars,
-                                                                                                        n_nodes,
-                                                                                                        uEltype)
+    _mpi_send_buffers, _mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(_mpi_neighbor_interfaces,
+                                                                                                          _mpi_neighbor_mortars,
+                                                                                                          ndims(mesh),
+                                                                                                          nvars,
+                                                                                                          n_nodes,
+                                                                                                          uEltype)
 
     # Determine local and total number of elements
     n_elements_global = Int(mesh.p4est.global_num_quadrants[])
@@ -286,6 +291,11 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
     first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1
     @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
 
+    mpi_neighbor_interfaces = VecOfArrays(_mpi_neighbor_interfaces)
+    mpi_neighbor_mortars = VecOfArrays(_mpi_neighbor_mortars)
+    mpi_send_buffers = VecOfArrays(_mpi_send_buffers)
+    mpi_recv_buffers = VecOfArrays(_mpi_recv_buffers)
+
     # TODO reuse existing structures
     @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
                        mpi_neighbor_mortars,
diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
index 0cb3bd7f409..d6cf6e1ce6d 100644
--- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
+++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
@@ -13,9 +13,10 @@ It stores a set of global indices for each boundary condition type and name to e
 during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions
 set by the user in the elixir file is also stored for printing.
 """
-mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}}
+mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any},
+                                               Vec <: AbstractVector{<:Integer}}
     boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet
-    boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices
+    boundary_indices::NTuple{N, Vec} # integer vectors containing global boundary indices
     boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file
     boundary_symbol_indices::Dict{Symbol, Vector{Int}} # integer vectors containing global boundary indices per boundary identifier
 end
@@ -33,10 +34,11 @@ function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache)
     boundary_symbol_indices = Dict{Symbol, Vector{Int}}()
 
     container = UnstructuredSortedBoundaryTypes{n_boundary_types,
-                                                typeof(boundary_condition_types)}(boundary_condition_types,
-                                                                                  boundary_indices,
-                                                                                  boundary_conditions,
-                                                                                  boundary_symbol_indices)
+                                                typeof(boundary_condition_types),
+                                                Vector{Int}}(boundary_condition_types,
+                                                             boundary_indices,
+                                                             boundary_conditions,
+                                                             boundary_symbol_indices)
 
     initialize!(container, cache)
 end
@@ -119,4 +121,7 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N
 
     return boundary_types_container
 end
+
+# @eval due to @muladd
+@eval Adapt.@adapt_structure(UnstructuredSortedBoundaryTypes)
 end # @muladd
diff --git a/test/Project.toml b/test/Project.toml
index 8651b3d4d22..6c67d2a82df 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,8 +1,10 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
diff --git a/test/runtests.jl b/test/runtests.jl
index a9dfc4cb999..d08ff018837 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -116,4 +116,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
     @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics"
         include("test_paper_self_gravitating_gas_dynamics.jl")
     end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA"
+        import CUDA
+        if CUDA.functional()
+            include("test_cuda.jl")
+        else
+            @warn "Unable to run CUDA tests on this machine"
+        end
+    end
 end
diff --git a/test/test_aqua.jl b/test/test_aqua.jl
index 9b3f2d67903..154088995ca 100644
--- a/test/test_aqua.jl
+++ b/test/test_aqua.jl
@@ -10,6 +10,7 @@ include("test_trixi.jl")
 @timed_testset "Aqua.jl" begin
     Aqua.test_all(Trixi,
                   ambiguities = false,
+                  unbound_args = false, # FIXME: UnstructuredSortedBoundaryTypes
                   # exceptions necessary for adding a new method `StartUpDG.estimate_h`
                   # in src/solvers/dgmulti/sbp.jl
                   piracies = (treat_as_own = [Trixi.StartUpDG.RefElemData,
diff --git a/test/test_cuda.jl b/test/test_cuda.jl
new file mode 100644
index 00000000000..f2fd11233c6
--- /dev/null
+++ b/test/test_cuda.jl
@@ -0,0 +1,20 @@
+module TestCUDA
+
+using CUDA
+using Test
+using Trixi
+
+include("test_trixi.jl")
+
+# EXAMPLES_DIR = joinpath(examples_dir(), "dgmulti_1d")
+
+# Start with a clean environment: remove Trixi.jl output directory if it exists
+outdir = "out"
+isdir(outdir) && rm(outdir, recursive = true)
+
+# TODO:
+
+# Clean up afterwards: delete Trixi.jl output directory
+@test_nowarn isdir(outdir) && rm(outdir, recursive = true)
+
+end # module
diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
index e14adc69283..1f5735ddc35 100644
--- a/test/test_p4est_2d.jl
+++ b/test/test_p4est_2d.jl
@@ -27,6 +27,12 @@ isdir(outdir) && rm(outdir, recursive = true)
         du_ode = similar(u_ode)
         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
     end
+    semi32 = Trixi.trixi_adapt(Array, Float32, semi)
+    @test real(semi32.solver) == Float32
+    @test real(semi32.solver.basis) == Float32
+    @test real(semi32.solver.mortar) == Float32
+    # TODO: remake ignores the mesh itself as well
+    @test real(semi32.mesh) == Float64
 end
 
 @trixi_testset "elixir_advection_nonconforming_flag.jl" begin
diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl
index 259eb39c545..c3291c3ba9d 100644
--- a/test/test_unstructured_2d.jl
+++ b/test/test_unstructured_2d.jl
@@ -2,6 +2,7 @@ module TestExamplesUnstructuredMesh2D
 
 using Test
 using Trixi
+using Adapt
 
 include("test_trixi.jl")
 
@@ -32,6 +33,12 @@ isdir(outdir) && rm(outdir, recursive = true)
         du_ode = similar(u_ode)
         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
     end
+    semi32 = Trixi.trixi_adapt(Array, Float32, semi)
+    @test real(semi32.solver) == Float32
+    @test real(semi32.solver.basis) == Float32
+    @test real(semi32.solver.mortar) == Float32
+    # TODO: remake ignores the mesh as well
+    @test real(semi32.mesh) == Float64
 end
 
 @trixi_testset "elixir_euler_free_stream.jl" begin