Skip to content

Commit

Permalink
Merge pull request #2235 from KhronosGroup/fix-2212
Browse files Browse the repository at this point in the history
MSL: Implement MSL 3.1 image atomics natively
  • Loading branch information
HansKristian-Work authored Nov 29, 2023
2 parents 50e90dd + f3573b9 commit a3da0e8
Show file tree
Hide file tree
Showing 30 changed files with 1,327 additions and 158 deletions.
75 changes: 75 additions & 0 deletions reference/opt/shaders-msl/comp/atomic-image.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"

#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>

using namespace metal;

// The required alignment of a linear texture of R32Uint format.
constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)

struct SSBO
{
uint u32;
int i32;
};

constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);

kernel void main0(device SSBO& ssbo [[buffer(2)]], texture2d<uint> uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d<int, access::write> iImage [[texture(1)]], device atomic_int* iImage_atomic [[buffer(1)]])
{
uint _19 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _27 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
iImage.write(int4(int(_27)), uint2(int2(1, 6)));
uint _32 = atomic_fetch_or_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _34 = atomic_fetch_xor_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _36 = atomic_fetch_and_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _38 = atomic_fetch_min_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _40 = atomic_fetch_max_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], 1u, memory_order_relaxed);
uint _44;
do
{
_44 = 10u;
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(1, 5), uImage)], &_44, 2u, memory_order_relaxed, memory_order_relaxed) && _44 == 10u);
int _47 = atomic_fetch_add_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _49 = atomic_fetch_or_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _51 = atomic_fetch_xor_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _53 = atomic_fetch_and_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _55 = atomic_fetch_min_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _57 = atomic_fetch_max_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 6), iImage)], 1, memory_order_relaxed);
int _61;
do
{
_61 = 10;
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&iImage_atomic[spvImage2DAtomicCoord(int2(1, 5), iImage)], &_61, 2, memory_order_relaxed, memory_order_relaxed) && _61 == 10);
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _82;
do
{
_82 = 10u;
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u);
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _99;
do
{
_99 = 10;
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10);
}

72 changes: 72 additions & 0 deletions reference/opt/shaders-msl/comp/atomic-image.msl31.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#pragma clang diagnostic ignored "-Wunused-variable"

#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>

using namespace metal;

struct SSBO
{
uint u32;
int i32;
};

constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);

kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d<int, access::read_write> iImage [[texture(1)]])
{
uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
iImage.write(int4(int(_27)), uint2(int2(1, 6)));
uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x;
uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x;
uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x;
uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x;
uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x;
uint _44;
uint4 _102;
do
{
_102.x = 10u;
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u);
_44 = _102.x;
int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x;
int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x;
int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x;
int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x;
int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x;
int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x;
int _61;
int4 _104;
do
{
_104.x = 10;
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10);
_61 = _104.x;
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _82;
do
{
_82 = 10u;
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u);
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _99;
do
{
_99 = 10;
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10);
}

Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access
{
foo.x = 1.0;
}
uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : uint{});
if (int(gl_FragCoord.x) == 3)
{
gl_HelperInvocation = true, discard_fragment();
}
int2 _101 = int2(gl_FragCoord.xy);
(gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_101)));
uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _108 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed));
uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _119 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed));
uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : uint{});
uint _108 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 62u, memory_order_relaxed) : uint{});
uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : uint{});
uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : uint{});
uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : uint{});
uint _119 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 100u, memory_order_relaxed) : uint{});
uint _124;
if (!gl_HelperInvocation)
{
Expand All @@ -56,7 +56,7 @@ fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access
}
else
{
_124 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed);
_124 = {};
}
bool _125 = gl_HelperInvocation;
out.fragColor = float4(1.0, float(_125), 0.0, 1.0);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma clang diagnostic ignored "-Wunused-variable"

#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>

using namespace metal;

struct foo_t
{
float x;
uint y;
};

struct main0_out
{
float4 fragColor [[color(0)]];
};

fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::read_write> bar [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
bool gl_HelperInvocation = {};
gl_HelperInvocation = simd_is_helper_thread();
if (!gl_HelperInvocation)
{
foo.x = 1.0;
}
uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : uint{});
if (int(gl_FragCoord.x) == 3)
{
gl_HelperInvocation = true, discard_fragment();
}
int2 _101 = int2(gl_FragCoord.xy);
(gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_101)));
uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : uint{});
uint _108 = (!gl_HelperInvocation ? bar.atomic_fetch_or(uint2(_101), 62u).x : uint{});
uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : uint{});
uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : uint{});
uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : uint{});
uint _119 = (!gl_HelperInvocation ? bar.atomic_fetch_max(uint2(_101), 100u).x : uint{});
uint _124;
uint4 _135;
if (!gl_HelperInvocation)
{
do
{
_135.x = 100u;
} while (!bar.atomic_compare_exchange_weak(uint2(_101), &_135, 42u) && _135.x == 100u);
_124 = _135.x;
}
else
{
_124 = {};
}
bool _125 = gl_HelperInvocation;
out.fragColor = float4(1.0, float(_125), 0.0, 1.0);
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct foo
{
int x;
};

struct main0_out
{
float4 fragColor [[color(0)]];
};

fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
bool gl_HelperInvocation = {};
gl_HelperInvocation = simd_is_helper_thread();
if (gl_FragCoord.y == 7.0)
{
gl_HelperInvocation = true, discard_fragment();
}
if (!gl_HelperInvocation)
{
_24.x = 0;
}
for (; float(_24.x) < gl_FragCoord.x; )
{
if (!gl_HelperInvocation)
{
_24.x++;
}
continue;
}
out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0);
return out;
}

Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access
{
foo.x = 1.0;
}
uint _90 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _90 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : uint{});
if (int(gl_FragCoord.x) == 3)
{
gl_HelperInvocation = true, discard_fragment();
}
int2 _100 = int2(gl_FragCoord.xy);
(gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_100)));
uint _102 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _107 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed));
uint _109 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _111 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _113 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
uint _118 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed));
uint _102 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : uint{});
uint _107 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 62u, memory_order_relaxed) : uint{});
uint _109 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : uint{});
uint _111 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : uint{});
uint _113 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : uint{});
uint _118 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 100u, memory_order_relaxed) : uint{});
uint _123;
if (!gl_HelperInvocation)
{
Expand All @@ -56,7 +56,7 @@ fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access
}
else
{
_123 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed);
_123 = {};
}
out.fragColor = float4(1.0, 0.0, 0.0, 1.0);
return out;
Expand Down
Loading

0 comments on commit a3da0e8

Please sign in to comment.