Skip to content

Commit 9da5f7c

Browse files
Merge pull request #2242 from KhronosGroup/fix-2221
MSL: Improve PtrAccessChain handling.
2 parents 766a74d + ffab994 commit 9da5f7c

8 files changed

+549
-17
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
7+
8+
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
9+
{
10+
threadgroup float2 test[64];
11+
float _21 = float(gl_GlobalInvocationID.x);
12+
float2 _22 = float2(_21);
13+
((&((&test)[0u]))[0u])[1u + 2u] = _22;
14+
((&test)[0u])[1u + 2u] = _22;
15+
((&test)[0u])[3u] = _22;
16+
((threadgroup float*)&((&test)[0u])[2u])[0u + 1u] = _21;
17+
}
18+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
7+
8+
kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
9+
{
10+
threadgroup float2 test[64];
11+
float _21 = float(gl_GlobalInvocationID.x);
12+
((threadgroup float*)&(*(true ? &test[1u] : &test[2u])))[1u] = _21;
13+
}
14+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#include <metal_stdlib>
2+
#include <simd/simd.h>
3+
4+
using namespace metal;
5+
6+
constant uint _15_tmp [[function_constant(0)]];
7+
constant uint _15 = is_function_constant_defined(_15_tmp) ? _15_tmp : 1u;
8+
constant uint _16_tmp [[function_constant(1)]];
9+
constant uint _16 = is_function_constant_defined(_16_tmp) ? _16_tmp : 1u;
10+
constant uint _17_tmp [[function_constant(2)]];
11+
constant uint _17 = is_function_constant_defined(_17_tmp) ? _17_tmp : 1u;
12+
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_15, _16, _17);
13+
14+
struct _6
15+
{
16+
uint4 _m0[1];
17+
};
18+
19+
struct _7
20+
{
21+
uint _m0;
22+
};
23+
24+
struct _8
25+
{
26+
_7 _m0;
27+
};
28+
29+
constant uchar4 _137 = {};
30+
31+
kernel void main0(device _6& _25 [[buffer(0)]], constant _8& _29 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
32+
{
33+
threadgroup uint _5[256];
34+
threadgroup uchar _10[1024];
35+
uint3 _20 = gl_WorkGroupSize;
36+
bool _40 = _29._m0._m0 != 0u;
37+
if (_40)
38+
{
39+
uchar _58 = uchar(((gl_LocalInvocationID.y * gl_LocalInvocationID.x) / gl_WorkGroupID.y) % 255u);
40+
uint _66;
41+
uint _61 = 0u;
42+
uint _62;
43+
for (;;)
44+
{
45+
_62 = _61 * _29._m0._m0;
46+
_66 = 0u;
47+
for (;;)
48+
{
49+
uint _67 = _66 + _62;
50+
uint _68 = _66 * _61;
51+
_5[_67] = gl_WorkGroupID.x + _68;
52+
uint _74 = _67 << 2u;
53+
uint _76 = _74 >> 10u;
54+
uint _78 = _74 & 1020u;
55+
uchar4 _80 = as_type<uchar4>(gl_WorkGroupID.y + _68);
56+
((&_10)[_76])[_78 | 1u] = _80.y;
57+
((&_10)[_76])[_78 | 2u] = _80.z;
58+
((&_10)[_76])[_78 | 3u] = _80.w;
59+
((&_10)[_76])[_78] = _58;
60+
uint _93 = _66 + 1u;
61+
if (_93 >= _29._m0._m0)
62+
{
63+
break;
64+
}
65+
else
66+
{
67+
_66 = _93;
68+
}
69+
}
70+
uint _100 = _61 + 1u;
71+
if (_100 >= _29._m0._m0)
72+
{
73+
break;
74+
}
75+
else
76+
{
77+
_61 = _100;
78+
continue;
79+
}
80+
}
81+
}
82+
threadgroup_barrier(mem_flags::mem_threadgroup);
83+
uint _112;
84+
if (_40)
85+
{
86+
_112 = 0u;
87+
uint _117;
88+
uint _113;
89+
for (;;)
90+
{
91+
_113 = _112 * _29._m0._m0;
92+
_117 = 0u;
93+
for (;;)
94+
{
95+
uint _118 = _117 + _113;
96+
uint _123 = _118 << 2u;
97+
uint _124 = _123 >> 10u;
98+
uint _125 = _123 & 1020u;
99+
uchar4 _138;
100+
_138.x = ((&_10)[_124])[_125];
101+
_138.y = ((&_10)[_124])[_125 | 1u];
102+
_138.z = ((&_10)[_124])[_125 | 2u];
103+
_138.w = ((&_10)[_124])[_125 | 3u];
104+
uint _143 = _5[_118] + as_type<uint>(_138);
105+
uint4 _144 = _25._m0[_118];
106+
_144.x = _143;
107+
_144.y = _143 >> 2u;
108+
_144.w = _143 >> 3u;
109+
_25._m0[_118] = _144;
110+
uint _150 = _117 + 1u;
111+
if (_150 >= _29._m0._m0)
112+
{
113+
break;
114+
}
115+
else
116+
{
117+
_117 = _150;
118+
}
119+
}
120+
uint _157 = _112 + 1u;
121+
if (_157 >= _29._m0._m0)
122+
{
123+
break;
124+
}
125+
else
126+
{
127+
_112 = _157;
128+
continue;
129+
}
130+
}
131+
}
132+
threadgroup_barrier(mem_flags::mem_threadgroup);
133+
}
134+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; SPIR-V
2+
; Version: 1.0
3+
; Generator: Khronos Glslang Reference Front End; 11
4+
; Bound: 26
5+
; Schema: 0
6+
OpCapability Shader
7+
OpCapability VariablePointers
8+
OpExtension "SPV_KHR_variable_pointers"
9+
%1 = OpExtInstImport "GLSL.std.450"
10+
OpMemoryModel Logical GLSL450
11+
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
12+
OpExecutionMode %main LocalSize 64 1 1
13+
OpSource GLSL 450
14+
OpName %main "main"
15+
OpName %test "test"
16+
OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
17+
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
18+
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
19+
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
20+
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
21+
%void = OpTypeVoid
22+
%3 = OpTypeFunction %void
23+
%float = OpTypeFloat 32
24+
%v2float = OpTypeVector %float 2
25+
%uint = OpTypeInt 32 0
26+
%uint_64 = OpConstant %uint 64
27+
%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64
28+
%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64
29+
%test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup
30+
%_ptr_Input_uint = OpTypePointer Input %uint
31+
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
32+
%v3uint = OpTypeVector %uint 3
33+
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
34+
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
35+
%uint_0 = OpConstant %uint 0
36+
%uint_1 = OpConstant %uint 1
37+
%uint_2 = OpConstant %uint 2
38+
%uint_3 = OpConstant %uint 3
39+
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
40+
%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
41+
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
42+
%main = OpFunction %void None %3
43+
%5 = OpLabel
44+
%14 = OpLoad %uint %gl_LocalInvocationIndex
45+
%19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
46+
%20 = OpLoad %uint %19
47+
%21 = OpConvertUToF %float %20
48+
%22 = OpCompositeConstruct %v2float %21 %21
49+
50+
; Dummy expression. *(&test + 0)
51+
%ptr0 = OpPtrAccessChain %_ptr_Workgroup__arr_v2float_uint_64 %test %uint_0
52+
%ptr1 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr0 %uint_0 %uint_1
53+
%ptr2 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr1 %uint_2
54+
OpStore %ptr2 %22
55+
56+
; Chain PtrAccessChain while keeping pointer type.
57+
%ptr3 = OpPtrAccessChain %_ptr_Workgroup_v2float %test %uint_0 %uint_1
58+
%ptr4 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr3 %uint_2
59+
OpStore %ptr4 %22
60+
61+
; Same semantics.
62+
%ptr5 = OpPtrAccessChain %_ptr_Workgroup_v2float %test %uint_0 %uint_3
63+
OpStore %ptr5 %22
64+
65+
; Scalar shenanigans.
66+
%ptr6 = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0
67+
%ptr7 = OpPtrAccessChain %_ptr_Workgroup_float %ptr6 %uint_1
68+
OpStore %ptr7 %21
69+
70+
OpReturn
71+
OpFunctionEnd
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; SPIR-V
2+
; Version: 1.0
3+
; Generator: Khronos Glslang Reference Front End; 11
4+
; Bound: 26
5+
; Schema: 0
6+
OpCapability Shader
7+
OpCapability VariablePointers
8+
OpExtension "SPV_KHR_variable_pointers"
9+
%1 = OpExtInstImport "GLSL.std.450"
10+
OpMemoryModel Logical GLSL450
11+
OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID
12+
OpExecutionMode %main LocalSize 64 1 1
13+
OpSource GLSL 450
14+
OpName %main "main"
15+
OpName %test "test"
16+
OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
17+
OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
18+
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
19+
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
20+
OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
21+
%void = OpTypeVoid
22+
%3 = OpTypeFunction %void
23+
%float = OpTypeFloat 32
24+
%bool = OpTypeBool
25+
%true = OpConstantTrue %bool
26+
%v2float = OpTypeVector %float 2
27+
%uint = OpTypeInt 32 0
28+
%uint_64 = OpConstant %uint 64
29+
%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64
30+
%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64
31+
%test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup
32+
%_ptr_Input_uint = OpTypePointer Input %uint
33+
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
34+
%v3uint = OpTypeVector %uint 3
35+
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
36+
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
37+
%uint_0 = OpConstant %uint 0
38+
%uint_1 = OpConstant %uint 1
39+
%uint_2 = OpConstant %uint 2
40+
%uint_3 = OpConstant %uint 3
41+
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
42+
%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
43+
%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
44+
%main = OpFunction %void None %3
45+
%5 = OpLabel
46+
%14 = OpLoad %uint %gl_LocalInvocationIndex
47+
%19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
48+
%20 = OpLoad %uint %19
49+
%21 = OpConvertUToF %float %20
50+
%22 = OpCompositeConstruct %v2float %21 %21
51+
52+
%a = OpAccessChain %_ptr_Workgroup_v2float %test %uint_1
53+
%b = OpAccessChain %_ptr_Workgroup_v2float %test %uint_2
54+
%c = OpSelect %_ptr_Workgroup_v2float %true %a %b
55+
56+
%d = OpAccessChain %_ptr_Workgroup_float %c %uint_1
57+
OpStore %d %21
58+
59+
OpReturn
60+
OpFunctionEnd

0 commit comments

Comments
 (0)