Skip to content

Commit 4c3aaa6

Browse files
committed
Ensure that we lookup the correct instruction for embedded masking/broadcast scenarios
1 parent 98b587c commit 4c3aaa6

File tree

9 files changed

+374
-185
lines changed

9 files changed

+374
-185
lines changed

src/coreclr/jit/codegen.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,8 @@ class CodeGen final : public CodeGenInterface
921921
#ifdef FEATURE_HW_INTRINSICS
922922
void genHWIntrinsic(GenTreeHWIntrinsic* node);
923923
#if defined(TARGET_XARCH)
924+
instruction lookupIns(NamedIntrinsic id, var_types type, insOpts instOptions);
925+
924926
void genHWIntrinsic_R_RM(
925927
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp, insOpts instOptions);
926928
void genHWIntrinsic_R_RM_I(
@@ -929,13 +931,7 @@ class CodeGen final : public CodeGenInterface
929931
void genHWIntrinsic_R_R_RM_I(
930932
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions);
931933
void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions);
932-
void genHWIntrinsic_R_R_R_RM(instruction ins,
933-
emitAttr attr,
934-
regNumber targetReg,
935-
regNumber op1Reg,
936-
regNumber op2Reg,
937-
GenTree* op3,
938-
insOpts instOptions);
934+
void genHWIntrinsic_R_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions);
939935
void genHWIntrinsic_R_R_R_RM_I(
940936
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions);
941937

src/coreclr/jit/codegenxarch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5776,7 +5776,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57765776
case NI_AVX2_ConvertToUInt32:
57775777
{
57785778
// These intrinsics are "ins reg/mem, xmm"
5779-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
5779+
ins = lookupIns(intrinsicId, baseType, INS_OPTS_NONE);
57805780
attr = emitActualTypeSize(baseType);
57815781
#if defined(TARGET_X86)
57825782
if (varTypeIsLong(baseType))
@@ -5803,7 +5803,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
58035803
case NI_AVX512_ExtractVector256:
58045804
{
58055805
// These intrinsics are "ins reg/mem, xmm, imm8"
5806-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
5806+
ins = lookupIns(intrinsicId, baseType, INS_OPTS_NONE);
58075807
attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize()));
58085808

58095809
if (intrinsicId == NI_X86Base_Extract)
@@ -5855,7 +5855,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
58555855
case NI_AVX512_ConvertToVector256UInt32WithSaturation:
58565856
{
58575857
// These intrinsics are "ins reg/mem, xmm"
5858-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
5858+
ins = lookupIns(intrinsicId, baseType, INS_OPTS_NONE);
58595859
attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize()));
58605860
break;
58615861
}

src/coreclr/jit/gentree.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19920,7 +19920,7 @@ bool GenTree::SupportsSettingZeroFlag()
1992019920
}
1992119921

1992219922
#ifdef FEATURE_HW_INTRINSICS
19923-
if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic(), nullptr)))
19923+
if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic())))
1992419924
{
1992519925
return true;
1992619926
}
@@ -20539,7 +20539,7 @@ bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
2053920539
{
2054020540
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();
2054120541
var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType();
20542-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr);
20542+
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, comp);
2054320543

2054420544
if (comp->codeGen->instIsEmbeddedBroadcastCompatible(ins))
2054520545
{
@@ -20784,7 +20784,10 @@ bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize,
2078420784

2078520785
if (tgtSimdBaseJitType != CORINFO_TYPE_UNDEF)
2078620786
{
20787-
ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20787+
instruction tgtIns = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20788+
assert(ins != tgtIns);
20789+
20790+
ins = tgtIns;
2078820791
maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
2078920792
}
2079020793

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 137 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -87,108 +87,181 @@ instruction HWIntrinsicInfo::lookupIns(NamedIntrinsic id, var_types type, Compil
8787
#endif // TARGET_X86
8888

8989
#if defined(TARGET_XARCH)
90-
instruction evexIns = ins;
91-
92-
switch (ins)
90+
if (comp != nullptr)
9391
{
94-
case INS_movdqa32:
92+
instruction evexIns = ins;
93+
94+
switch (ins)
9595
{
96-
if (varTypeIsLong(type))
96+
case INS_movdqa32:
9797
{
98-
evexIns = INS_vmovdqa64;
98+
if (varTypeIsLong(type))
99+
{
100+
evexIns = INS_vmovdqa64;
101+
}
102+
break;
99103
}
100-
break;
101-
}
102104

103-
case INS_movdqu32:
104-
{
105-
if (varTypeIsLong(type))
105+
case INS_movdqu32:
106106
{
107-
evexIns = INS_vmovdqu64;
107+
if (varTypeIsLong(type))
108+
{
109+
evexIns = INS_vmovdqu64;
110+
}
111+
break;
108112
}
109-
break;
110-
}
111113

112-
case INS_vbroadcastf32x4:
113-
{
114-
if (type == TYP_DOUBLE)
114+
case INS_pandd:
115115
{
116-
evexIns = INS_vbroadcastf64x2;
116+
if (varTypeIsLong(type))
117+
{
118+
evexIns = INS_vpandq;
119+
}
120+
break;
117121
}
118-
break;
119-
}
120122

121-
case INS_vbroadcasti32x4:
122-
{
123-
if (varTypeIsLong(type))
123+
case INS_pandnd:
124124
{
125-
evexIns = INS_vbroadcasti64x2;
125+
if (varTypeIsLong(type))
126+
{
127+
evexIns = INS_vpandnq;
128+
}
129+
break;
126130
}
127-
break;
128-
}
129131

130-
case INS_vextractf32x4:
131-
{
132-
if (type == TYP_DOUBLE)
132+
case INS_pord:
133133
{
134-
evexIns = INS_vextractf64x2;
134+
if (varTypeIsLong(type))
135+
{
136+
evexIns = INS_vporq;
137+
}
138+
break;
135139
}
136-
else if (varTypeIsInt(type))
140+
141+
case INS_pxord:
137142
{
138-
evexIns = INS_vextracti32x4;
143+
if (varTypeIsLong(type))
144+
{
145+
evexIns = INS_vpxorq;
146+
}
147+
break;
139148
}
140-
else if (varTypeIsLong(type))
149+
150+
case INS_vbroadcastf32x4:
141151
{
142-
evexIns = INS_vextracti64x2;
152+
if (type == TYP_DOUBLE)
153+
{
154+
evexIns = INS_vbroadcastf64x2;
155+
}
156+
break;
143157
}
144-
break;
145-
}
146158

147-
case INS_vextracti32x4:
148-
{
149-
if (varTypeIsLong(type))
159+
case INS_vbroadcasti32x4:
150160
{
151-
evexIns = INS_vextracti64x2;
161+
if (varTypeIsLong(type))
162+
{
163+
evexIns = INS_vbroadcasti64x2;
164+
}
165+
break;
152166
}
153-
break;
154-
}
155167

156-
case INS_vinsertf32x4:
157-
{
158-
if (type == TYP_DOUBLE)
168+
case INS_vextractf32x4:
169+
{
170+
if (type == TYP_DOUBLE)
171+
{
172+
evexIns = INS_vextractf64x2;
173+
}
174+
else if (varTypeIsInt(type))
175+
{
176+
evexIns = INS_vextracti32x4;
177+
}
178+
else if (varTypeIsLong(type))
179+
{
180+
evexIns = INS_vextracti64x2;
181+
}
182+
break;
183+
}
184+
185+
case INS_vextracti32x4:
159186
{
160-
evexIns = INS_vinsertf64x2;
187+
if (varTypeIsLong(type))
188+
{
189+
evexIns = INS_vextracti64x2;
190+
}
191+
break;
161192
}
162-
else if (varTypeIsInt(type))
193+
194+
case INS_vperm2f128:
163195
{
164-
evexIns = INS_vinserti32x4;
196+
if (type == TYP_DOUBLE)
197+
{
198+
evexIns = INS_vshuff64x2;
199+
}
200+
else if (varTypeIsInt(type))
201+
{
202+
evexIns = INS_vshufi32x4;
203+
}
204+
else if (varTypeIsLong(type))
205+
{
206+
evexIns = INS_vshufi64x2;
207+
}
208+
else
209+
{
210+
evexIns = INS_vshuff32x4;
211+
}
212+
break;
165213
}
166-
else if (varTypeIsLong(type))
214+
215+
case INS_vperm2i128:
167216
{
168-
evexIns = INS_vinserti64x2;
217+
if (varTypeIsLong(type))
218+
{
219+
evexIns = INS_vshufi64x2;
220+
}
221+
else
222+
{
223+
evexIns = INS_vshufi32x4;
224+
}
225+
break;
169226
}
170-
break;
171-
}
172227

173-
case INS_vinserti32x4:
174-
{
175-
if (varTypeIsLong(type))
228+
case INS_vinsertf32x4:
176229
{
177-
evexIns = INS_vinserti64x2;
230+
if (type == TYP_DOUBLE)
231+
{
232+
evexIns = INS_vinsertf64x2;
233+
}
234+
else if (varTypeIsInt(type))
235+
{
236+
evexIns = INS_vinserti32x4;
237+
}
238+
else if (varTypeIsLong(type))
239+
{
240+
evexIns = INS_vinserti64x2;
241+
}
242+
break;
243+
}
244+
245+
case INS_vinserti32x4:
246+
{
247+
if (varTypeIsLong(type))
248+
{
249+
evexIns = INS_vinserti64x2;
250+
}
251+
break;
252+
}
253+
254+
default:
255+
{
256+
break;
178257
}
179-
break;
180258
}
181259

182-
default:
260+
if ((evexIns != ins) && comp->canUseEvexEncoding())
183261
{
184-
break;
262+
ins = evexIns;
185263
}
186264
}
187-
188-
if ((evexIns != ins) && (comp != nullptr) && comp->canUseEvexEncoding())
189-
{
190-
ins = evexIns;
191-
}
192265
#endif // TARGET_XARCH
193266

194267
return ins;

src/coreclr/jit/hwintrinsic.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ struct HWIntrinsicInfo
615615

616616
static instruction lookupIns(NamedIntrinsic id, var_types type, Compiler* comp);
617617

618-
static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode, Compiler* comp)
618+
static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode)
619619
{
620620
assert(intrinsicNode != nullptr);
621621

@@ -631,7 +631,7 @@ struct HWIntrinsicInfo
631631
type = intrinsicNode->GetSimdBaseType();
632632
}
633633

634-
return lookupIns(intrinsic, type, comp);
634+
return lookupIns(intrinsic, type, nullptr);
635635
}
636636

637637
static HWIntrinsicCategory lookupCategory(NamedIntrinsic id)

0 commit comments

Comments
 (0)