Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/array.jl b/src/array.jl
index d66396bfc..d843209ad 100644
--- a/src/array.jl
+++ b/src/array.jl
@@ -50,19 +50,19 @@ end
# As well as "mutable singleton" types like `Symbol` that use pointer-identity
function valid_type(@nospecialize(T))
- if Base.allocatedinline(T)
- if hasfieldcount(T)
- return all(valid_type, fieldtypes(T))
+ if Base.allocatedinline(T)
+ if hasfieldcount(T)
+ return all(valid_type, fieldtypes(T))
+ end
+ return true
+ elseif Base.ismutabletype(T)
+ return Base.datatype_fieldcount(T) == 0
end
- return true
- elseif Base.ismutabletype(T)
- return Base.datatype_fieldcount(T) == 0
- end
- return false
+ return false
end
-@inline function check_eltype(name, T)
- if !valid_type(T)
+@inline function check_eltype(name, T)
+ return if !valid_type(T)
explanation = explain_eltype(T)
error("""
$name only supports element types that are allocated inline.
@@ -247,7 +247,7 @@ end
function Base.unsafe_wrap(::Type{CuArray{T,N,M}},
ptr::CuPtr{T}, dims::NTuple{N,Int};
own::Bool=false, ctx::CuContext=context()) where {T,N,M}
- check_eltype("unsafe_wrap(CuArray, ...)", T)
+ check_eltype("unsafe_wrap(CuArray, ...)", T)
sz = prod(dims) * aligned_sizeof(T)
# create a memory object
diff --git a/test/base/array.jl b/test/base/array.jl
index 42f4668af..83335046b 100644
--- a/test/base/array.jl
+++ b/test/base/array.jl
@@ -176,14 +176,14 @@ end
# symbols and tuples thereof
let a = CuArray([:a])
- b = unsafe_wrap(CuArray, pointer(a), 1)
- @test typeof(b) <: CuArray{Symbol,1}
- @test size(b) == (1,)
+ b = unsafe_wrap(CuArray, pointer(a), 1)
+ @test typeof(b) <: CuArray{Symbol, 1}
+ @test size(b) == (1,)
end
- let a = CuArray([(:a,:b)])
- b = unsafe_wrap(CuArray, pointer(a), 1)
- @test typeof(b) <: CuArray{Tuple{Symbol,Symbol},1}
- @test size(b) == (1,)
+ let a = CuArray([(:a, :b)])
+ b = unsafe_wrap(CuArray, pointer(a), 1)
+ @test typeof(b) <: CuArray{Tuple{Symbol, Symbol}, 1}
+ @test size(b) == (1,)
end
end
|
|
CI failures related. |
|
Fun! This fails: but this doesn't The latter being the one I tested locally |
|
Having |
|
Yeah, that's why I reverted the commit that added it to memory.jl The crux is that we need a version of I am doubly amused that : Of course works... I am half tempted to shadow sizeof inside CUDA.jl with an arguably correct implementation. |
df49b7c to
f35b436
Compare
36e6120 to
55fde61
Compare
Codecov ReportAll modified and coverable lines are covered by tests ✅
Additional details and impacted files@@ Coverage Diff @@
## master #2753 +/- ##
===========================================
+ Coverage 77.34% 89.61% +12.27%
===========================================
Files 153 153
Lines 13108 13202 +94
===========================================
+ Hits 10138 11831 +1693
+ Misses 2970 1371 -1599 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
55fde61 to
dc105d3
Compare
There was a problem hiding this comment.
CUDA.jl Benchmarks
Details
| Benchmark suite | Current: dc105d3 | Previous: 7a83380 | Ratio |
|---|---|---|---|
latency/precompile |
42852388609.5 ns |
42926176632.5 ns |
1.00 |
latency/ttfp |
7124007951 ns |
7106699468 ns |
1.00 |
latency/import |
3376439835 ns |
3389948934 ns |
1.00 |
integration/volumerhs |
9622831.5 ns |
9608052 ns |
1.00 |
integration/byval/slices=1 |
147617 ns |
147162 ns |
1.00 |
integration/byval/slices=3 |
426742 ns |
425761 ns |
1.00 |
integration/byval/reference |
145513 ns |
145313 ns |
1.00 |
integration/byval/slices=2 |
287399 ns |
286425 ns |
1.00 |
integration/cudadevrt |
104082 ns |
103604 ns |
1.00 |
kernel/indexing |
14845 ns |
14311 ns |
1.04 |
kernel/indexing_checked |
15595 ns |
15150.5 ns |
1.03 |
kernel/occupancy |
712.2765957446809 ns |
707.8943661971831 ns |
1.01 |
kernel/launch |
2430.4444444444443 ns |
2395.4444444444443 ns |
1.01 |
kernel/rand |
16234 ns |
18678 ns |
0.87 |
array/reverse/1d |
20378 ns |
19617 ns |
1.04 |
array/reverse/2d |
25377.5 ns |
24183.5 ns |
1.05 |
array/reverse/1d_inplace |
11470 ns |
10894 ns |
1.05 |
array/reverse/2d_inplace |
13470 ns |
12860 ns |
1.05 |
array/copy |
21313 ns |
20975 ns |
1.02 |
array/iteration/findall/int |
159797.5 ns |
157315 ns |
1.02 |
array/iteration/findall/bool |
140863 ns |
138316 ns |
1.02 |
array/iteration/findfirst/int |
163877 ns |
154273.5 ns |
1.06 |
array/iteration/findfirst/bool |
165584 ns |
154976.5 ns |
1.07 |
array/iteration/scalar |
73623 ns |
73010 ns |
1.01 |
array/iteration/logical |
220743.5 ns |
215446 ns |
1.02 |
array/iteration/findmin/1d |
48764 ns |
41421 ns |
1.18 |
array/iteration/findmin/2d |
99894 ns |
94396 ns |
1.06 |
array/reductions/reduce/1d |
47927 ns |
43605.5 ns |
1.10 |
array/reductions/reduce/2d |
52530 ns |
45135 ns |
1.16 |
array/reductions/mapreduce/1d |
46890 ns |
40394 ns |
1.16 |
array/reductions/mapreduce/2d |
43615.5 ns |
51842 ns |
0.84 |
array/broadcast |
21622 ns |
21077 ns |
1.03 |
array/copyto!/gpu_to_gpu |
11405 ns |
11034 ns |
1.03 |
array/copyto!/cpu_to_gpu |
218981 ns |
216527 ns |
1.01 |
array/copyto!/gpu_to_cpu |
285643 ns |
283217 ns |
1.01 |
array/accumulate/1d |
110396 ns |
108949 ns |
1.01 |
array/accumulate/2d |
81232 ns |
80328 ns |
1.01 |
array/construct |
1264.2 ns |
1243.7 ns |
1.02 |
array/random/randn/Float32 |
44970.5 ns |
47595 ns |
0.94 |
array/random/randn!/Float32 |
25502 ns |
25281 ns |
1.01 |
array/random/rand!/Int64 |
27522 ns |
27337 ns |
1.01 |
array/random/rand!/Float32 |
8964.333333333334 ns |
8944.333333333334 ns |
1.00 |
array/random/rand/Int64 |
30484 ns |
33758 ns |
0.90 |
array/random/rand/Float32 |
13412 ns |
13235 ns |
1.01 |
array/permutedims/4d |
61881 ns |
61033.5 ns |
1.01 |
array/permutedims/2d |
55855 ns |
54927 ns |
1.02 |
array/permutedims/3d |
56883 ns |
55899 ns |
1.02 |
array/sorting/1d |
2778520 ns |
2777029.5 ns |
1.00 |
array/sorting/by |
3369694 ns |
3367967 ns |
1.00 |
array/sorting/2d |
1087235 ns |
1085746 ns |
1.00 |
cuda/synchronization/stream/auto |
1042.8 ns |
1030 ns |
1.01 |
cuda/synchronization/stream/nonblocking |
7186 ns |
8027.8 ns |
0.90 |
cuda/synchronization/stream/blocking |
810.5052631578948 ns |
803.8265306122449 ns |
1.01 |
cuda/synchronization/context/auto |
1185.4 ns |
1173.1 ns |
1.01 |
cuda/synchronization/context/nonblocking |
7433 ns |
7706.2 ns |
0.96 |
cuda/synchronization/context/blocking |
906.625 ns |
906.5853658536586 ns |
1.00 |
This comment was automatically generated by workflow using github-action-benchmark.
dc105d3 to
dac3df0
Compare
Spiritual follow-up on #2624
Noticed while working on trixi-framework/Trixi.jl#2212