-
Notifications
You must be signed in to change notification settings - Fork 572
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2235 from KhronosGroup/fix-2212
MSL: Implement MSL 3.1 image atomics natively
- Loading branch information
Showing
30 changed files
with
1,327 additions
and
158 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#pragma clang diagnostic ignored "-Wmissing-prototypes" | ||
#pragma clang diagnostic ignored "-Wunused-variable" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
#include <metal_atomic> | ||
|
||
using namespace metal; | ||
|
||
// The required alignment of a linear texture of R32Uint format. | ||
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]]; | ||
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4; | ||
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics | ||
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x) | ||
|
||
struct SSBO | ||
{ | ||
uint u32; | ||
int i32; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); | ||
|
||
kernel void main0(device SSBO& ssbo [[buffer(2)]], texture2d<uint> uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d<int, access::write> iImage [[texture(1)]], device atomic_int* iImage_atomic [[buffer(1)]]) | ||
{ | ||
uint _19 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _27 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
iImage.write(int4(int(_27)), uint2(int2(1, 6))); | ||
uint _32 = atomic_fetch_or_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _34 = atomic_fetch_xor_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _36 = atomic_fetch_and_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _38 = atomic_fetch_min_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _40 = atomic_fetch_max_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed); | ||
uint _44; | ||
do | ||
{ | ||
_44 = 10u; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], &_44, 2u, memory_order_relaxed, memory_order_relaxed) && _44 == 10u); | ||
int _47 = atomic_fetch_add_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _49 = atomic_fetch_or_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _51 = atomic_fetch_xor_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _53 = atomic_fetch_and_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _55 = atomic_fetch_min_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _57 = atomic_fetch_max_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed); | ||
int _61; | ||
do | ||
{ | ||
_61 = 10; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 5), iImage)], &_61, 2, memory_order_relaxed, memory_order_relaxed) && _61 == 10); | ||
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _82; | ||
do | ||
{ | ||
_82 = 10u; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u); | ||
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _99; | ||
do | ||
{ | ||
_99 = 10; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#pragma clang diagnostic ignored "-Wunused-variable" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
#include <metal_atomic> | ||
|
||
using namespace metal; | ||
|
||
struct SSBO | ||
{ | ||
uint u32; | ||
int i32; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); | ||
|
||
kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d<int, access::read_write> iImage [[texture(1)]]) | ||
{ | ||
uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; | ||
uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x; | ||
iImage.write(int4(int(_27)), uint2(int2(1, 6))); | ||
uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x; | ||
uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x; | ||
uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x; | ||
uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x; | ||
uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x; | ||
uint _44; | ||
uint4 _102; | ||
do | ||
{ | ||
_102.x = 10u; | ||
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u); | ||
_44 = _102.x; | ||
int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x; | ||
int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x; | ||
int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x; | ||
int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x; | ||
int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x; | ||
int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x; | ||
int _61; | ||
int4 _104; | ||
do | ||
{ | ||
_104.x = 10; | ||
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10); | ||
_61 = _104.x; | ||
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); | ||
uint _82; | ||
do | ||
{ | ||
_82 = 10u; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u); | ||
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); | ||
int _99; | ||
do | ||
{ | ||
_99 = 10; | ||
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl31.frag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#pragma clang diagnostic ignored "-Wunused-variable" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
#include <metal_atomic> | ||
|
||
using namespace metal; | ||
|
||
struct foo_t | ||
{ | ||
float x; | ||
uint y; | ||
}; | ||
|
||
struct main0_out | ||
{ | ||
float4 fragColor [[color(0)]]; | ||
}; | ||
|
||
fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::read_write> bar [[texture(0)]], float4 gl_FragCoord [[position]]) | ||
{ | ||
main0_out out = {}; | ||
bool gl_HelperInvocation = {}; | ||
gl_HelperInvocation = simd_is_helper_thread(); | ||
if (!gl_HelperInvocation) | ||
{ | ||
foo.x = 1.0; | ||
} | ||
uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : uint{}); | ||
if (int(gl_FragCoord.x) == 3) | ||
{ | ||
gl_HelperInvocation = true, discard_fragment(); | ||
} | ||
int2 _101 = int2(gl_FragCoord.xy); | ||
(gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_101))); | ||
uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : uint{}); | ||
uint _108 = (!gl_HelperInvocation ? bar.atomic_fetch_or(uint2(_101), 62u).x : uint{}); | ||
uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : uint{}); | ||
uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : uint{}); | ||
uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : uint{}); | ||
uint _119 = (!gl_HelperInvocation ? bar.atomic_fetch_max(uint2(_101), 100u).x : uint{}); | ||
uint _124; | ||
uint4 _135; | ||
if (!gl_HelperInvocation) | ||
{ | ||
do | ||
{ | ||
_135.x = 100u; | ||
} while (!bar.atomic_compare_exchange_weak(uint2(_101), &_135, 42u) && _135.x == 100u); | ||
_124 = _135.x; | ||
} | ||
else | ||
{ | ||
_124 = {}; | ||
} | ||
bool _125 = gl_HelperInvocation; | ||
out.fragColor = float4(1.0, float(_125), 0.0, 1.0); | ||
return out; | ||
} | ||
|
40 changes: 40 additions & 0 deletions
40
reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl31.frag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
|
||
using namespace metal; | ||
|
||
struct foo | ||
{ | ||
int x; | ||
}; | ||
|
||
struct main0_out | ||
{ | ||
float4 fragColor [[color(0)]]; | ||
}; | ||
|
||
fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]]) | ||
{ | ||
main0_out out = {}; | ||
bool gl_HelperInvocation = {}; | ||
gl_HelperInvocation = simd_is_helper_thread(); | ||
if (gl_FragCoord.y == 7.0) | ||
{ | ||
gl_HelperInvocation = true, discard_fragment(); | ||
} | ||
if (!gl_HelperInvocation) | ||
{ | ||
_24.x = 0; | ||
} | ||
for (; float(_24.x) < gl_FragCoord.x; ) | ||
{ | ||
if (!gl_HelperInvocation) | ||
{ | ||
_24.x++; | ||
} | ||
continue; | ||
} | ||
out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0); | ||
return out; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.