Skip to content

Commit def360f

Browse files
authored
remove special REX.W handling from some instructions (#109257)
1 parent 42c1be2 commit def360f

File tree

2 files changed

+4
-18
lines changed

2 files changed

+4
-18
lines changed

src/coreclr/jit/emitxarch.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,20 +1587,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
15871587
return false;
15881588
}
15891589

1590-
case INS_vbroadcastsd:
1591-
case INS_vpbroadcastq:
1592-
{
1593-
// TODO-XARCH-AVX512: These use W1 if a kmask is involved
1594-
return TakesEvexPrefix(id);
1595-
}
1596-
1597-
case INS_vpermilpd:
1598-
case INS_vpermilpdvar:
1599-
{
1600-
// TODO-XARCH-AVX512: These use W1 if a kmask or broaadcast from memory is involved
1601-
return TakesEvexPrefix(id);
1602-
}
1603-
16041590
default:
16051591
{
16061592
unreached();

src/coreclr/jit/instrsxarch.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -464,16 +464,16 @@ INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE
464464
INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles
465465
INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles
466466
INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed float values read from memory to entire ymm register
467-
INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register
467+
INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register
468468
INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register
469469
INST3(vextractf128, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed floating point values
470470
INST3(vinsertf128, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE4, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values
471471
INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_TT_NONE, Input_64Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
472472
INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
473473
INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_TT_NONE, Input_8Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes
474474
INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values
475-
INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
476-
INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
475+
INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
476+
INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
477477
INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
478478
INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
479479
INST3(vtestpd, "testpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_TT_NONE, Input_64Bit | REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test
@@ -491,7 +491,7 @@ INST3(vinserti128, "inserti128", IUM_WR, BAD_CODE, BAD_CODE,
491491
INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs
492492
INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int8 value from reg/memory to entire ymm register
493493
INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int32 value from reg/memory to entire ymm register
494-
INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register
494+
INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register
495495
INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register
496496
INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register
497497
INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute Packed Doublewords Elements

0 commit comments

Comments
 (0)