diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index c3fe46055ee196..7f0c53addd6b3b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -60,6 +60,9 @@ INST5(pop, "pop", IUM_WR, 0x00008E, BAD_CODE, INST5(push_hide, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, ILLEGAL, ILLEGAL, INS_TT_NONE, Encoding_REX2) INST5(pop_hide, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, ILLEGAL, ILLEGAL, INS_TT_NONE, Encoding_REX2) +INST5(push2, "push2", IUM_RD, 0x0030FF, BAD_CODE, 0x0030FF, BAD_CODE, 0x0030FF, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_Flags_Has_NDD) +INST5(pop2, "pop2", IUM_WR, 0x00008F, BAD_CODE, 0x00008F, BAD_CODE, 0x00008F, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_Flags_Has_NDD) + INST5(inc, "inc", IUM_RW, 0x0000FE, BAD_CODE, 0x0000FE, BAD_CODE, 0x000040, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Encoding_REX2 | INS_Flags_Has_NF) INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, 0x0008FE, BAD_CODE, 0x000048, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF) @@ -215,266 +218,266 @@ INSTMUL(imul_31, "imul", IUM_RD, BAD_CODE, 0xD54400003868 #define FIRST_SSE_INSTRUCTION INS_addpd // Instructions for SSE, SSE2 -INST3(addpd, "addpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles -INST3(addps, "addps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles -INST3(addsd, "addsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles -INST3(addss, "addss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles -INST3(andnpd, "andnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles -INST3(andnps, "andnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles -INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles -INST3(andps, "andps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles -INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles -INST3(cmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles -INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles -INST3(cmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles -INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare doubles -INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare singles -INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to doubles -INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to singles -INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to DWORDs -INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to singles -INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to DWORDs -INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to doubles -INST3(cvtsd2si32, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD -INST3(cvtsd2si64, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD -INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles -INST3(cvtsi2sd32, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double -INST3(cvtsi2sd64, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar double -INST3(cvtsi2ss32, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single -INST3(cvtsi2ss64, "cvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar single -INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles -INST3(cvtss2si32, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD -INST3(cvtss2si64, "cvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD -INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed doubles to DWORDs -INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed singles to DWORDs -INST3(cvttsd2si32, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs -INST3(cvttsd2si64, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs -INST3(cvttss2si32, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD -INST3(cvttss2si64, "cvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD -INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), 13C, 4C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles -INST3(divps, "divps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), 11C, 3C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles -INST3(divsd, "divsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), 13C, 4C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles -INST3(divss, "divss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), 11C, 3C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles +INST3(addpd, "vaddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles +INST3(addps, "vaddps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles +INST3(addsd, "vaddsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles +INST3(addss, "vaddss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles +INST3(andnpd, "vandnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles +INST3(andnps, "vandnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles +INST3(andpd, "vandpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles +INST3(andps, "vandps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles +INST3(cmppd, "vcmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles +INST3(cmpps, "vcmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles +INST3(cmpsd, "vcmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles +INST3(cmpss, "vcmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles +INST3(comisd, "vcomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare doubles +INST3(comiss, "vcomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare singles +INST3(cvtdq2pd, "vcvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to doubles +INST3(cvtdq2ps, "vcvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to singles +INST3(cvtpd2dq, "vcvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to DWORDs +INST3(cvtpd2ps, "vcvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to singles +INST3(cvtps2dq, "vcvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to DWORDs +INST3(cvtps2pd, "vcvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to doubles +INST3(cvtsd2si32, "vcvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD +INST3(cvtsd2si64, "vcvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD +INST3(cvtsd2ss, "vcvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles +INST3(cvtsi2sd32, "vcvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double +INST3(cvtsi2sd64, "vcvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar double +INST3(cvtsi2ss32, "vcvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar single +INST3(cvtsi2ss64, "vcvtsi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2A), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar single +INST3(cvtss2sd, "vcvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles +INST3(cvtss2si32, "vcvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD +INST3(cvtss2si64, "vcvtss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt scalar single to DWORD/QWORD +INST3(cvttpd2dq, "vcvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed doubles to DWORDs +INST3(cvttps2dq, "vcvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed singles to DWORDs +INST3(cvttsd2si32, "vcvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(cvttsd2si64, "vcvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs +INST3(cvttss2si32, "vcvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(cvttss2si64, "vcvttss2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar single to DWORD +INST3(divpd, "vdivpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), 13C, 4C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed doubles +INST3(divps, "vdivps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), 11C, 3C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles +INST3(divsd, "vdivsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), 13C, 4C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles +INST3(divss, "vdivss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), 11C, 3C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, ZERO, 4C, INS_TT_NONE, REX_WIG) -INST3(maskmovdqu, "maskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), 400C, 6C, INS_TT_NONE, REX_WIG) -INST3(maxpd, "maxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles -INST3(maxps, "maxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles -INST3(maxsd, "maxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double -INST3(maxss, "maxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single +INST3(maskmovdqu, "vmaskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), 400C, 6C, INS_TT_NONE, REX_WIG | Encoding_VEX) +INST3(maxpd, "vmaxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles +INST3(maxps, "vmaxps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5F), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed singles +INST3(maxsd, "vmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5F), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar double +INST3(maxss, "vmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5F), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum scalar single INST3(mfence, "mfence", IUM_RD, 0x000FF0AE, BAD_CODE, BAD_CODE, ZERO, 33C, INS_TT_NONE, REX_WIG) -INST3(minpd, "minpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles -INST3(minps, "minps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles -INST3(minsd, "minsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double -INST3(minss, "minss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single -INST3(movapd, "movapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movaps, "movaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movd32, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move DWORD between xmm regs <-> memory/r32 regs -INST3(movd64, "movq", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move QWORD between xmm regs <-> memory/r64 regs -INST3(movdqa32, "movdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) -INST3(movdqu32, "movdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) -INST3(movhlps, "movhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), 1C, 1C, INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(movhpd, "movhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movhps, "movhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movlhps, "movlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), 1C, 1C, INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(movlpd, "movlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movlps, "movlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. -INST3(movmskps, "movmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) -INST3(movntdq, "movntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movnti32, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_REX2) -INST3(movnti64, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_REX2) -INST3(movntpd, "movntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movntps, "movntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move Quadword between memory/mm <-> regs -INST3(movsd_simd, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movss, "movss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) -INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(movups, "movups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) -INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles -INST3(mulps, "mulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles -INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles -INST3(mulss, "mulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single -INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles -INST3(orps, "orps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles -INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation -INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation -INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation -INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers -INST3(paddd, "paddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers -INST3(paddq, "paddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers -INST3(paddsb, "paddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results -INST3(paddsw, "paddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results -INST3(paddusb, "paddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results -INST3(paddusw, "paddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results -INST3(paddw, "paddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers -INST3(pandd, "pand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs -INST3(pandnd, "pandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs -INST3(pavgb, "pavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers -INST3(pavgw, "pavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers -INST3(pcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality -INST3(pcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality -INST3(pcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality -INST3(pcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than -INST3(pcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than -INST3(pcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than -INST3(pextrw, "pextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 16-bit value into a r32 with zero extended to 32-bits -INST3(pinsrw, "pinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index -INST3(pmaddwd, "pmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst -INST3(pmaxsw, "pmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words -INST3(pmaxub, "pmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes -INST3(pminsw, "pminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words -INST3(pminub, "pminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes -INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Move the MSB bits of all bytes in a xmm reg to an int reg -INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers -INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers -INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result -INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result -INST3(pord, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs -INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) -INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) -INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) -INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) -INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), 3C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers -INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed shuffle of 32-bit integers -INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. -INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. -INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers -INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes -INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers -INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers -INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers -INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers -INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers -INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes -INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers -INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers -INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers -INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers -INST3(psubq, "psubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers -INST3(psubsb, "psubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation -INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation -INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation -INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation -INST3(psubw, "psubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers -INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) -INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) -INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) -INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) -INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) -INST3(pxord, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs -INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal of packed singles -INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single -INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles -INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single +INST3(minpd, "vminpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5D), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed doubles +INST3(minps, "vminps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5D), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum packed singles +INST3(minsd, "vminsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5D), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar double +INST3(minss, "vminss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5D), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Minimum scalar single +INST3(movapd, "vmovapd", IUM_WR, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movaps, "vmovaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movd32, "vmovd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move DWORD between xmm regs <-> memory/r32 regs +INST3(movd64, "vmovq", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move QWORD between xmm regs <-> memory/r64 regs +INST3(movdqa32, "vmovdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) +INST3(movdqu32, "vmovdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) +INST3(movhlps, "vmovhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), 1C, 1C, INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(movhpd, "vmovhpd", IUM_WR, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movhps, "vmovhps", IUM_WR, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movlhps, "vmovlhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x16), 1C, 1C, INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(movlpd, "vmovlpd", IUM_WR, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movlps, "vmovlps", IUM_WR, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movmskpd, "vmovmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. +INST3(movmskps, "vmovmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) +INST3(movntdq, "vmovntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movnti32, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_REX2) +INST3(movnti64, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_REX2) +INST3(movntpd, "vmovntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movntps, "vmovntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movq, "vmovq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move Quadword between memory/mm <-> regs +INST3(movsd_simd, "vmovsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movss, "vmovss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) +INST3(movupd, "vmovupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(movups, "vmovups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) +INST3(mulpd, "vmulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles +INST3(mulps, "vmulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles +INST3(mulsd, "vmulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles +INST3(mulss, "vmulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single +INST3(orpd, "vorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles +INST3(orps, "vorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles +INST3(packssdw, "vpackssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation +INST3(packsswb, "vpacksswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation +INST3(packuswb, "vpackuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation +INST3(paddb, "vpaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers +INST3(paddd, "vpaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers +INST3(paddq, "vpaddq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed quad-word (64-bit) integers +INST3(paddsb, "vpaddsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed byte integers and saturate the results +INST3(paddsw, "vpaddsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xED), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed signed word integers and saturate the results +INST3(paddusb, "vpaddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results +INST3(paddusw, "vpaddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results +INST3(paddw, "vpaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers +INST3(pandd, "vpand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(pandnd, "vpandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(pavgb, "vpavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers +INST3(pavgw, "vpavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers +INST3(pcmpeqb, "vpcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality +INST3(pcmpeqd, "vpcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality +INST3(pcmpeqw, "vpcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality +INST3(pcmpgtb, "vpcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than +INST3(pcmpgtd, "vpcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than +INST3(pcmpgtw, "vpcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than +INST3(pextrw, "vpextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 16-bit value into a r32 with zero extended to 32-bits +INST3(pinsrw, "vpinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index +INST3(pmaddwd, "vpmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst +INST3(pmaxsw, "vpmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words +INST3(pmaxub, "vpmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes +INST3(pminsw, "vpminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words +INST3(pminub, "vpminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes +INST3(pmovmskb, "vpmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Move the MSB bits of all bytes in a xmm reg to an int reg +INST3(pmulhuw, "vpmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers +INST3(pmulhw, "vpmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers +INST3(pmullw, "vpmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result +INST3(pmuludq, "vpmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result +INST3(pord, "vpor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(prefetchnta, "prefetchnta", IUM_RD, 0x000F0018, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2) +INST3(psadbw, "vpsadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), 3C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers +INST3(pshufd, "vpshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed shuffle of 32-bit integers +INST3(pshufhw, "vpshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3(pshuflw, "vpshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3(pslld, "vpslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers +INST3(pslldq, "vpslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes +INST3(psllq, "vpsllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers +INST3(psllw, "vpsllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers +INST3(psrad, "vpsrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers +INST3(psraw, "vpsraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers +INST3(psrld, "vpsrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers +INST3(psrldq, "vpsrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes +INST3(psrlq, "vpsrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers +INST3(psrlw, "vpsrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM | INS_TT_MEM128, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers +INST3(psubb, "vpsubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers +INST3(psubd, "vpsubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed double-word (32-bit) integers +INST3(psubq, "vpsubq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // subtract packed quad-word (64-bit) integers +INST3(psubsb, "vpsubsb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation +INST3(psubsw, "vpsubsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation +INST3(psubusb, "vpsubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation +INST3(psubusw, "vpsubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation +INST3(psubw, "vpsubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers +INST3(punpckhbw, "vpunpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(punpckhdq, "vpunpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(punpckhqdq, "vpunpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) +INST3(punpckhwd, "vpunpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) +INST3(punpcklbw, "vpunpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) +INST3(punpckldq, "vpunpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(punpcklqdq, "vpunpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) +INST3(punpcklwd, "vpunpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) +INST3(pxord, "vpxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(rcpps, "vrcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal of packed singles +INST3(rcpss, "vrcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single +INST3(rsqrtps, "vrsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles +INST3(rsqrtss, "vrsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, ZERO, 6C, INS_TT_NONE, REX_WIG) -INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), 13C, 4C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed doubles -INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), 12C, 3C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed singles -INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), 13C, 4C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double -INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), 12C, 3C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single -INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles -INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles -INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles -INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles -INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare doubles -INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare singles -INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles -INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles +INST3(shufpd, "vshufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(shufps, "vshufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(sqrtpd, "vsqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), 13C, 4C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed doubles +INST3(sqrtps, "vsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), 12C, 3C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed singles +INST3(sqrtsd, "vsqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), 13C, 4C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double +INST3(sqrtss, "vsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), 12C, 3C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single +INST3(subpd, "vsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed doubles +INST3(subps, "vsubps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed singles +INST3(subsd, "vsubsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles +INST3(subss, "vsubss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles +INST3(ucomisd, "vucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare doubles +INST3(ucomiss, "vucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare singles +INST3(unpckhpd, "vunpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(unpckhps, "vunpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(unpcklpd, "vunpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(unpcklps, "vunpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(xorpd, "vxorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles +INST3(xorps, "vxorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles // Instructions for SSE3, SSSE3, SSE41, SSE42, POPCNT -INST3(addsubpd, "addsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles -INST3(addsubps, "addsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles -INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values -INST3(blendps, "blendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values +INST3(addsubpd, "vaddsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles +INST3(addsubps, "vaddsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles +INST3(blendpd, "vblendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values +INST3(blendps, "vblendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Doubles INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Singles -INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), 9C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs -INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), 13C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs -INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values -INST3(haddpd, "haddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles -INST3(haddps, "haddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats -INST3(hsubpd, "hsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles -INST3(hsubps, "hsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats -INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), 1C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value -INST3(lddqu, "lddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Load Unaligned integer -INST3(movddup, "movddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_MOVDDUP, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values -INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint -INST3(movshdup, "movshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values -INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values -INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 4C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference -INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes -INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers -INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers -INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation -INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right +INST3(dppd, "vdppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), 9C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs +INST3(dpps, "vdpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), 13C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs +INST3(extractps, "vextractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values +INST3(haddpd, "vhaddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles +INST3(haddps, "vhaddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats +INST3(hsubpd, "vhsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles +INST3(hsubps, "vhsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats +INST3(insertps, "vinsertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), 1C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value +INST3(lddqu, "vlddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Load Unaligned integer +INST3(movddup, "vmovddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_MOVDDUP, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values +INST3(movntdqa, "vmovntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint +INST3(movshdup, "vmovshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values +INST3(movsldup, "vmovsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values +INST3(mpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 4C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference +INST3(pabsb, "vpabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes +INST3(pabsd, "vpabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers +INST3(pabsw, "vpabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers +INST3(packusdw, "vpackusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation +INST3(palignr, "vpalignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Bytes -INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words -INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(pcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(pextrb, "pextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte -INST3(pextrd, "pextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword -INST3(pextrq, "pextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword -INST3(pextrw_sse42, "pextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word -INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add -INST3(phaddsw, "phaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation -INST3(phaddw, "phaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers -INST3(phminposuw, "phminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum -INST3(phsubd, "phsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers -INST3(phsubsw, "phsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation -INST3(phsubw, "phsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers -INST3(pinsrb, "pinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte -INST3(pinsrd, "pinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword -INST3(pinsrq, "pinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword -INST3(pmaddubsw, "pmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes -INST3(pmaxsb, "pmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes -INST3(pmaxsd, "pmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers -INST3(pmaxud, "pmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers -INST3(pmaxuw, "pmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers -INST3(pminsb, "pminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes -INST3(pminsd, "pminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers -INST3(pminud, "pminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers -INST3(pminuw, "pminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers -INST3(pmovsxbd, "pmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int -INST3(pmovsxbq, "pmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long -INST3(pmovsxbw, "pmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short -INST3(pmovsxdq, "pmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long -INST3(pmovsxwd, "pmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int -INST3(pmovsxwq, "pmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long -INST3(pmovzxbd, "pmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg -INST3(pmovzxbq, "pmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon -INST3(pmovzxbw, "pmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short -INST3(pmovzxdq, "pmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long -INST3(pmovzxwd, "pmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int -INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long -INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result -INST3(pmulhrsw, "pmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale -INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 10C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result -INST3(pshufb, "pshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes -INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -INST3(ptest, "ptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare -INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed double precision floating-point values -INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed single precision floating-point values -INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values -INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values +INST3(pblendw, "vpblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words +INST3(pcmpeqq, "vpcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(pcmpgtq, "vpcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(pextrb, "vpextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte +INST3(pextrd, "vpextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword +INST3(pextrq, "vpextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword +INST3(pextrw_sse42, "vpextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word +INST3(phaddd, "vphaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add +INST3(phaddsw, "vphaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation +INST3(phaddw, "vphaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers +INST3(phminposuw, "vphminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum +INST3(phsubd, "vphsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers +INST3(phsubsw, "vphsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation +INST3(phsubw, "vphsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers +INST3(pinsrb, "vpinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte +INST3(pinsrd, "vpinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword +INST3(pinsrq, "vpinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword +INST3(pmaddubsw, "vpmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes +INST3(pmaxsb, "vpmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes +INST3(pmaxsd, "vpmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers +INST3(pmaxud, "vpmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers +INST3(pmaxuw, "vpmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers +INST3(pminsb, "vpminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes +INST3(pminsd, "vpminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers +INST3(pminud, "vpminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers +INST3(pminuw, "vpminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers +INST3(pmovsxbd, "vpmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int +INST3(pmovsxbq, "vpmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long +INST3(pmovsxbw, "vpmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short +INST3(pmovsxdq, "vpmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long +INST3(pmovsxwd, "vpmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int +INST3(pmovsxwq, "vpmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long +INST3(pmovzxbd, "vpmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg +INST3(pmovzxbq, "vpmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon +INST3(pmovzxbw, "vpmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short +INST3(pmovzxdq, "vpmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long +INST3(pmovzxwd, "vpmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int +INST3(pmovzxwq, "vpmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long +INST3(pmuldq, "vpmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result +INST3(pmulhrsw, "vpmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale +INST3(pmulld, "vpmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 10C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result +INST3(pshufb, "vpshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes +INST3(psignb, "vpsignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN +INST3(psignd, "vpsignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN +INST3(psignw, "vpsignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN +INST3(ptest, "vptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare +INST3(roundpd, "vroundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed double precision floating-point values +INST3(roundps, "vroundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed single precision floating-point values +INST3(roundsd, "vroundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values +INST3(roundss, "vroundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values // Instructions for AESNI, PCLMULQDQ -INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow -INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow -INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow -INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow -INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), 8C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation -INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), 7C, 13C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist -INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), 7C, 1C, INS_TT_FULL_MEM, KMask_Base1 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(aesdec, "vaesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow +INST3(aesdeclast, "vaesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow +INST3(aesenc, "vaesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow +INST3(aesenclast, "vaesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow +INST3(aesimc, "vaesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), 8C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation +INST3(aeskeygenassist, "vaeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), 7C, 13C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist +INST3(pclmulqdq, "vpclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), 7C, 1C, INS_TT_FULL_MEM, KMask_Base1 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords // Instructions for SHA INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords @@ -486,126 +489,126 @@ INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform Two Rounds of SHA256 Operation // Instructions for GFNI -INST3(gf2p8affineinvqb, "gf2p8affineinvqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation Inverse -INST3(gf2p8affineqb, "gf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation -INST3(gf2p8mulb, "gf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), 5C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes +INST3(gf2p8affineinvqb, "vgf2p8affineinvqb",IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation Inverse +INST3(gf2p8affineqb, "vgf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation +INST3(gf2p8mulb, "vgf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), 5C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes #define LAST_SSE_INSTRUCTION INS_gf2p8mulb #define FIRST_AVX_INSTRUCTION INS_vblendvpd // Instructions for AVX -INST3(vblendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), 2C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles -INST3(vblendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), 2C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles -INST3(vbroadcastf32x4, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), ILLEGAL, ILLEGAL, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed float values read from memory to entire ymm register -INST3(vbroadcastsd, "broadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register -INST3(vbroadcastss, "broadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register -INST3(vextractf32x4, "extractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed floating point values -INST3(vinsertf32x4, "insertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values -INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores -INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores -INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), 2C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes -INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), 3C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values -INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values -INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values -INST3(vtestpd, "testpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test -INST3(vtestps, "testps", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test -INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, ZERO, 1C, INS_TT_NONE, REX_WIG | Encoding_VEX) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) +INST3(vblendvpd, "vblendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4B), 2C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Doubles +INST3(vblendvps, "vblendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4A), 2C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Singles +INST3(vbroadcastf32x4, "vbroadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), ILLEGAL, ILLEGAL, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed float values read from memory to entire ymm register +INST3(vbroadcastsd, "vbroadcastsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register +INST3(vbroadcastss, "vbroadcastss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x18), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast float value read from memory to entire ymm register +INST3(vextractf32x4, "vextractf128", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed floating point values +INST3(vinsertf32x4, "vinsertf128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed floating point values +INST3(vmaskmovpd, "vmaskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores +INST3(vmaskmovps, "vmaskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores +INST3(vpblendvb, "vpblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), 2C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes +INST3(vperm2f128, "vperm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), 3C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values +INST3(vpermilpd, "vpermilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilpdvar, "vpermilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilps, "vpermilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vpermilpsvar, "vpermilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vtestpd, "vtestpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test +INST3(vtestps, "vtestps", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test +INST3(vzeroupper, "vzeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, ZERO, 1C, INS_TT_NONE, REX_WIG | Encoding_VEX) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) // Instructions for AVX2, BMI1, BMI2, F16C, LZCNT, MOVBE -INST3(vbroadcasti32x4, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed integer values read from memory to entire ymm register -INST3(vcvtph2ps, "cvtph2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Packed FP16 Values to Single Precision Floating-Point Values -INST3(vcvtps2ph, "cvtps2ph", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1D), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Single Precision FP Value to 16-bit FP Value -INST3(vextracti32x4, "extracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed integer values -INST3(vgatherdpd, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices -INST3(vgatherdps, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices -INST3(vgatherqpd, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices -INST3(vgatherqps, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices -INST3(vinserti32x4, "inserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values -INST3(vpblendd, "pblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs -INST3(vpbroadcastb, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int8 value from reg/memory to entire ymm register -INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int32 value from reg/memory to entire ymm register -INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register -INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register -INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), 3C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register -INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements -INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute Double-Precision Floating-Point Values -INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements -INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute 64-bit of input register -INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword -INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices -INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword -INST3(vpgatherqq, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices -INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores -INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores -INST3(vpsllvd, "psllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical -INST3(vpsllvq, "psllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical -INST3(vpsravd, "psravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic -INST3(vpsrlvd, "psrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical -INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical +INST3(vbroadcasti32x4, "vbroadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast packed integer values read from memory to entire ymm register +INST3(vcvtph2ps, "vcvtph2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Packed FP16 Values to Single Precision Floating-Point Values +INST3(vcvtps2ph, "vcvtps2ph", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1D), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Convert Single Precision FP Value to 16-bit FP Value +INST3(vextracti32x4, "vextracti128", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 128-bit packed integer values +INST3(vgatherdpd, "vgatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices +INST3(vgatherdps, "vgatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices +INST3(vgatherqpd, "vgatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices +INST3(vgatherqps, "vgatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices +INST3(vinserti32x4, "vinserti128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), 3C, 1C, INS_TT_TUPLE4, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 128-bit packed integer values +INST3(vpblendd, "vpblendd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x02), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed DWORDs +INST3(vpbroadcastb, "vpbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int8 value from reg/memory to entire ymm register +INST3(vpbroadcastd, "vpbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int32 value from reg/memory to entire ymm register +INST3(vpbroadcastq, "vpbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register +INST3(vpbroadcastw, "vpbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register +INST3(vperm2i128, "vperm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), 3C, 1C, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register +INST3(vpermd, "vpermd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements +INST3(vpermpd, "vpermpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute Double-Precision Floating-Point Values +INST3(vpermps, "vpermps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements +INST3(vpermq, "vpermq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute 64-bit of input register +INST3(vpgatherdd, "vpgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword +INST3(vpgatherdq, "vpgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices +INST3(vpgatherqd, "vpgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword +INST3(vpgatherqq, "vpgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices +INST3(vpmaskmovd, "vpmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores +INST3(vpmaskmovq, "vpmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores +INST3(vpsllvd, "vpsllvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsllvq, "vpsllvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x47), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsravd, "vpsravd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpsrlvd, "vpsrlvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical +INST3(vpsrlvq, "vpsrlvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x45), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical #define FIRST_FMA_INSTRUCTION INS_vfmadd132pd // id nm um mr mi rm lat tp tt flags -INST3(vfmadd132pd, "fmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfmadd213pd, "fmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231pd, "fmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd132ps, "fmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfmadd213ps, "fmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231ps, "fmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd132sd, "fmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values -INST3(vfmadd213sd, "fmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231sd, "fmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd132ss, "fmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values -INST3(vfmadd213ss, "fmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231ss, "fmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values -INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values -INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132pd, "fmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmsub213pd, "fmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231pd, "fmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132ps, "fmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmsub213ps, "fmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231ps, "fmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132sd, "fmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values -INST3(vfmsub213sd, "fmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231sd, "fmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132ss, "fmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values -INST3(vfmsub213ss, "fmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231ss, "fmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132pd, "fnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfnmadd213pd, "fnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231pd, "fnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132ps, "fnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfnmadd213ps, "fnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231ps, "fnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132sd, "fnmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values -INST3(vfnmadd213sd, "fnmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231sd, "fnmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132ss, "fnmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values -INST3(vfnmadd213ss, "fnmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231ss, "fnmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132pd, "fnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfnmsub213pd, "fnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231pd, "fnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132ps, "fnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfnmsub213ps, "fnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231ps, "fnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132sd, "fnmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values -INST3(vfnmsub213sd, "fnmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231sd, "fnmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132ss, "fnmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values -INST3(vfnmsub213ss, "fnmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231ss, "fnmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132pd, "vfmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfmadd213pd, "vfmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231pd, "vfmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132ps, "vfmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfmadd213ps, "vfmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231ps, "vfmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132sd, "vfmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values +INST3(vfmadd213sd, "vfmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231sd, "vfmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132ss, "vfmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values +INST3(vfmadd213ss, "vfmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd231ss, "vfmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub132pd, "vfmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmaddsub213pd, "vfmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub231pd, "vfmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub132ps, "vfmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmaddsub213ps, "vfmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub231ps, "vfmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd132pd, "vfmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values +INST3(vfmsubadd213pd, "vfmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd231pd, "vfmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd132ps, "vfmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values +INST3(vfmsubadd213ps, "vfmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsubadd231ps, "vfmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132pd, "vfmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmsub213pd, "vfmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231pd, "vfmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132ps, "vfmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmsub213ps, "vfmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231ps, "vfmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132sd, "vfmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values +INST3(vfmsub213sd, "vfmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231sd, "vfmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub132ss, "vfmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values +INST3(vfmsub213ss, "vfmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmsub231ss, "vfmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132pd, "vfnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfnmadd213pd, "vfnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231pd, "vfnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132ps, "vfnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfnmadd213ps, "vfnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231ps, "vfnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132sd, "vfnmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values +INST3(vfnmadd213sd, "vfnmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231sd, "vfnmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132ss, "vfnmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values +INST3(vfnmadd213ss, "vfnmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd231ss, "vfnmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132pd, "vfnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfnmsub213pd, "vfnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231pd, "vfnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132ps, "vfnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfnmsub213ps, "vfnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231ps, "vfnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132sd, "vfnmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values +INST3(vfnmsub213sd, "vfnmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231sd, "vfnmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132ss, "vfnmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values +INST3(vfnmsub213ss, "vfnmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub231ss, "vfnmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // #define LAST_FMA_INSTRUCTION INS_vfnmsub231ss #define FIRST_BMI_INSTRUCTION INS_andn @@ -627,16 +630,16 @@ INST3(shrx, "shrx", IUM_WR, BAD_CODE, BAD_CODE, #define FIRST_AVXVNNI_INSTRUCTION INS_vpdpbusd // Instructions for AVXVNNI -INST3(vpdpbusd, "pdpbusd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes -INST3(vpdpbusds, "pdpbusds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation -INST3(vpdpwssd, "pdpwssd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x52), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers -INST3(vpdpwssds, "pdpwssds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x53), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation +INST3(vpdpbusd, "vpdpbusd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes +INST3(vpdpbusds, "vpdpbusds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation +INST3(vpdpwssd, "vpdpwssd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x52), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers +INST3(vpdpwssds, "vpdpwssds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x53), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation #define LAST_AVXVNNI_INSTRUCTION INS_vpdpwssds #define FIRST_AVXIFMA_INSTRUCTION INS_vpmadd52huq // Instructions for AVXIFMA -INST3(vpmadd52huq, "pmadd52huq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB5), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators -INST3(vpmadd52luq, "pmadd52luq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Integers and Add the Low 52-Bit Products to Qword Accumulators +INST3(vpmadd52huq, "vpmadd52huq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB5), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators +INST3(vpmadd52luq, "vpmadd52luq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Integers and Add the Low 52-Bit Products to Qword Accumulators #define LAST_AVXIFMA_INSTRUCTION INS_vpmadd52luq #define LAST_AVX_INSTRUCTION INS_vpmadd52luq @@ -654,14 +657,14 @@ INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_ INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND NOT masks INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical AND masks -INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers -INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Encoding_EVEX | KInstruction) // Move from and to mask registers +INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register @@ -669,10 +672,10 @@ INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_ INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks INST3(korq, "korq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks -INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical OR masks INST3(kshiftlb, "kshiftlb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), 4C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers INST3(kshiftld, "kshiftld", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), 4C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers @@ -682,10 +685,10 @@ INST3(kshiftrb, "kshiftrb", IUM_WR, BAD_CODE, BAD_ INST3(kshiftrd, "kshiftrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), 4C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers INST3(kshiftrq, "kshiftrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), 4C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers INST3(kshiftrw, "kshiftrw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), 4C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags INST3(kunpckbw, "kunpckbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4B), 4C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers INST3(kunpckdq, "kunpckdq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4B), 4C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers INST3(kunpckwd, "kunpckwd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4B), 4C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Unpack for mask registers @@ -697,438 +700,436 @@ INST3(kxorb, "kxorb", IUM_WR, BAD_CODE, BAD_ INST3(kxord, "kxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks INST3(kxorq, "kxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks INST3(kxorw, "kxorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction | KInstructionWithLBit) // Bitwise logical XOR masks -INST3(valignd, "alignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align doubleword vectors -INST3(valignq, "alignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align quadword vectors -INST3(vblendmpd, "blendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float64 vectors using an OpMask control -INST3(vblendmps, "blendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float32 vectors using an OpMask control -INST3(vbroadcastf32x2, "broadcastf32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcastf32x8, "broadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), ILLEGAL, ILLEGAL, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcastf64x2, "broadcastf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcastf64x4, "broadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register -INST3(vbroadcasti32x2, "broadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vbroadcasti32x8, "broadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), ILLEGAL, ILLEGAL, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vbroadcasti64x2, "broadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vbroadcasti64x4, "broadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vcmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles -INST3(vcmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles -INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles -INST3(vcmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles -INST3(vcompresspd, "compresspd", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Store sparse packed doubles into dense memory -INST3(vcompressps, "compressps", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Store sparse packed singles into dense memory -INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to signed QWORDs -INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs -INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned QWORDs -INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed singles to signed QWORDs -INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs -INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned QWORDs -INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles -INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles -INST3(vcvtsd2usi32, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtsd2usi64, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi32, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD -INST3(vcvtss2usi64, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD -INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to signed QWORDs -INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs -INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned QWORDs -INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to signed QWORDs -INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs -INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned QWORDs -INST3(vcvttsd2usi32, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD -INST3(vcvttsd2usi64, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD -INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD -INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), 8C, 1C, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD -INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles -INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles -INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles -INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles -INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double -INST3(vcvtusi2sd64, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double -INST3(vcvtusi2ss32, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single -INST3(vcvtusi2ss64, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single -INST3(vdbpsadbw, "dbpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 3C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes -INST3(vexpandpd, "expandpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Load sparse packed doubles from dense memory -INST3(vexpandps, "expandps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Load sparse packed singles from dense memory -INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextractf64x2, "extractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values -INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vextracti64x2, "extracti64x2", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vfixupimmpd, "fixupimmpd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed double-precision floating-point values -INST3(vfixupimmps, "fixupimmps", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed single-precision floating-point values -INST3(vfixupimmsd, "fixupimmsd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar double-precision floating-point value -INST3(vfixupimmss, "fixupimmss", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar single-precision floating-point value -INST3(vfpclasspd, "fpclasspd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Test Types of Packed Float64 Values -INST3(vfpclassps, "fpclassps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Test Types of Packed Float32 Values -INST3(vfpclasssd, "fpclasssd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Test Types of Scalar Float64 Values -INST3(vfpclassss, "fpclassss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar Float32 Values -INST3(vgatherdpd_msk, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices -INST3(vgatherdps_msk, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices -INST3(vgatherqpd_msk, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices -INST3(vgatherqps_msk, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices -INST3(vgetexppd, "getexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract exponents of packed double-precision floating-point values -INST3(vgetexpps, "getexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract exponents of packed single-precision floating-point values -INST3(vgetexpsd, "getexpsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar double-precision floating-point value -INST3(vgetexpss, "getexpss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar single-precision floating-point value -INST3(vgetmantpd, "getmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract mantissas of packed double-precision floating-point values -INST3(vgetmantps, "getmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract mantissas of packed single-precision floating-point values -INST3(vgetmantsd, "getmantsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar double-precision floating-point value -INST3(vgetmantss, "getmantss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar single-precision floating-point value -INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinsertf64x2, "insertf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values -INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vinserti64x2, "inserti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) -INST3(vmovdqu16, "movdqu16", IUM_WR, SSEDBL(0x7F), BAD_CODE, SSEDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) -INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) -INST3(vmovdqu8, "movdqu8", IUM_WR, SSEDBL(0x7F), BAD_CODE, SSEDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX) -INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Packed absolute value of 64-bit integers -INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs -INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs -INST3(vpblendmb, "pblendmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int64 vectors using an OpMask control -INST3(vpblendmd, "pblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Byte vectors using an OpMask control -INST3(vpblendmq, "pblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int32 vectors using an OpMask control -INST3(vpblendmw, "pblendmw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Word vectors using an OpMask control -INST3(vpbroadcastb_gpr, "pbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Broadcast int8 value from gpr to entire register -INST3(vpbroadcastd_gpr, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast int32 value from gpr to entire register -INST3(vpbroadcastq_gpr, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast int64 value from gpr to entire register -INST3(vpbroadcastw_gpr, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Broadcast int16 value from gpr to entire register -INST3(vpcmpb, "pcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpeqb, "pcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality -INST3(vpcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality -INST3(vpcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(vpcmpeqw, "pcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality -INST3(vpcmpgtb, "pcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than -INST3(vpcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than -INST3(vpcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(vpcmpgtw, "pcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than -INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpub, "pcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpuw, "pcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcmpw, "pcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(vpcompressd, "pcompressd", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Store sparse packed doublewords into dense memory -INST3(vpcompressq, "pcompressq", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Store sparse packed quadwords into dense memory -INST3(vpconflictd, "pconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Detect conflicts within a vector of packed dword values into dense memory/register -INST3(vpconflictq, "pconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Detect conflicts within a vector of packed qword values into dense memory/register -INST3(vpermi2d, "permi2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2pd, "permi2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2ps, "permi2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2q, "permi2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2w, "permi2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), 7C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermpd_reg, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register -INST3(vpermq_reg, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register -INST3(vpermt2d, "permt2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2pd, "permt2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2ps, "permt2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2q, "permt2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2w, "permt2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), 7C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermw, "permw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), 6C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Doublewords Elements -INST3(vpexpandd, "pexpandd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Load sparse packed doublewords from dense memory -INST3(vpexpandq, "pexpandq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Load sparse packed quadwords from dense memory -INST3(vpgatherdd_msk, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword -INST3(vpgatherdq_msk, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices -INST3(vpgatherqd_msk, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword -INST3(vpgatherqq_msk, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices -INST3(vplzcntd, "plzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Count the number of leading zero bits for packed dword values -INST3(vplzcntq, "plzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Count the number of leading zero bits for packed qword values -INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit signed integers -INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit unsigned integers -INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit signed integers -INST3(vpminuq, "pminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit unsigned integers -INST3(vpmovb2m, "pmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) -INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) -INST3(vpmovdb, "pmovdb", IUM_WR, PSSE38(0xF3, 0x31), BAD_CODE, PSSE38(0xF3, 0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovdw, "pmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovm2b, "pmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) -INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) -INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) -INST3(vpmovm2w, "pmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) -INST3(vpmovq2m, "pmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) -INST3(vpmovqb, "pmovqb", IUM_WR, PSSE38(0xF3, 0x32), BAD_CODE, PSSE38(0xF3, 0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovqd, "pmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovqw, "pmovqw", IUM_WR, PSSE38(0xF3, 0x34), BAD_CODE, PSSE38(0xF3, 0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovsdb, "pmovsdb", IUM_WR, PSSE38(0xF3, 0x21), BAD_CODE, PSSE38(0xF3, 0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovsdw, "pmovsdw", IUM_WR, PSSE38(0xF3, 0x23), BAD_CODE, PSSE38(0xF3, 0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovsqb, "pmovsqb", IUM_WR, PSSE38(0xF3, 0x22), BAD_CODE, PSSE38(0xF3, 0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovsqd, "pmovsqd", IUM_WR, PSSE38(0xF3, 0x25), BAD_CODE, PSSE38(0xF3, 0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovsqw, "pmovsqw", IUM_WR, PSSE38(0xF3, 0x24), BAD_CODE, PSSE38(0xF3, 0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovswb, "pmovswb", IUM_WR, PSSE38(0xF3, 0x20), BAD_CODE, PSSE38(0xF3, 0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) -INST3(vpmovusdb, "pmovusdb", IUM_WR, PSSE38(0xF3, 0x11), BAD_CODE, PSSE38(0xF3, 0x11), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovusdw, "pmovusdw", IUM_WR, PSSE38(0xF3, 0x13), BAD_CODE, PSSE38(0xF3, 0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vpmovusqb, "pmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_CODE, PSSE38(0xF3, 0x12), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovusqd, "pmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovusqw, "pmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) -INST3(vpmovuswb, "pmovuswb", IUM_WR, PSSE38(0xF3, 0x10), BAD_CODE, PSSE38(0xF3, 0x10), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) -INST3(vpmovw2m, "pmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) -INST3(vpmovwb, "pmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_CODE, PSSE38(0xF3, 0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) -INST3(vpmullq, "pmullq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 15C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result -INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs -INST3(vprold, "prold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolq, "prolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolvd, "prolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolvq, "prolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprord, "prord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorq, "prorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorvd, "prorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorvq, "prorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vpscatterdd_msk, "pscatterdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Dword -INST3(vpscatterdq_msk, "pscatterdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword with Signed Dword Indices -INST3(vpscatterqd_msk, "pscatterqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Qword -INST3(vpscatterqq_msk, "pscatterqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Qword with Signed Dword Indices -INST3(vpsllvw, "psllvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical -INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers -INST3(vpsravq, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic -INST3(vpsravw, "psravw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic -INST3(vpsrlvw, "psrlvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical -INST3(vpternlogd, "pternlogd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic -INST3(vpternlogq, "pternlogq", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic -INST3(vptestmb, "ptestmb", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestmd, "ptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestmq, "ptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestmw, "ptestmw", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestnmb, "ptestnmb", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vptestnmd, "ptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vptestnmq, "ptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vptestnmw, "ptestnmw", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs -INST3(vrangepd, "rangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed double-precision floating-point values -INST3(vrangeps, "rangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed single-precision floating-point values -INST3(vrangesd, "rangesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar double-precision floating-point value -INST3(vrangess, "rangess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar single-precision floating-point value -INST3(vrcp14pd, "rcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of packed double-precision floating-point values -INST3(vrcp14ps, "rcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of packed single-precision floating-point values -INST3(vrcp14sd, "rcp14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar double-precision floating-point value -INST3(vrcp14ss, "rcp14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar single-precision floating-point value -INST3(vreducepd, "reducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Perform a reduction transformation on packed double-precision floating-point values -INST3(vreduceps, "reduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Perform a reduction transformation on packed single-precision floating-point values -INST3(vreducesd, "reducesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar double-precision floating-point value -INST3(vreducess, "reducess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar single-precision floating-point value -INST3(vrndscalepd, "rndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Round packed double-precision floating-point values to include a given number of fraction bits -INST3(vrndscaleps, "rndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Round packed single-precision floating-point values to include a given number of fraction bits -INST3(vrndscalesd, "rndscalesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double-precision floating-point value to include a given number of fraction bits -INST3(vrndscaless, "rndscaless", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single-precision floating-point value to include a given number of fraction bits -INST3(vrsqrt14pd, "rsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed double-precision floating-point values -INST3(vrsqrt14ps, "rsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed single-precision floating-point values -INST3(vrsqrt14sd, "rsqrt14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar double-precision floating-point value -INST3(vrsqrt14ss, "rsqrt14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar single-precision floating-point value -INST3(vscalefpd, "scalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed double-precision floating-point values -INST3(vscalefps, "scalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed single-precision floating-point values -INST3(vscalefsd, "scalefsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar double-precision floating-point value -INST3(vscalefss, "scalefss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar single-precision floating-point value -INST3(vscatterdpd_msk, "scatterdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices -INST3(vscatterdps_msk, "scatterdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Dword -INST3(vscatterqpd_msk, "scatterqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices -INST3(vscatterqps_msk, "scatterqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Qword -INST3(vshuff32x4, "shuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshuff64x2, "shuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshufi32x4, "shufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshufi64x2, "shufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(valignd, "valignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align doubleword vectors +INST3(valignq, "valignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align quadword vectors +INST3(vblendmpd, "vblendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float64 vectors using an OpMask control +INST3(vblendmps, "vblendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float32 vectors using an OpMask control +INST3(vbroadcastf32x2, "vbroadcastf32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcastf32x8, "vbroadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), ILLEGAL, ILLEGAL, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcastf64x2, "vbroadcastf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcastf64x4, "vbroadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register +INST3(vbroadcasti32x2, "vbroadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vbroadcasti32x8, "vbroadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), ILLEGAL, ILLEGAL, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vbroadcasti64x2, "vbroadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vbroadcasti64x4, "vbroadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), ILLEGAL, ILLEGAL, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register +INST3(vcmppd, "vcmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles +INST3(vcmpps, "vcmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles +INST3(vcmpsd, "vcmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles +INST3(vcmpss, "vcmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles +INST3(vcompresspd, "vcompresspd", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Store sparse packed doubles into dense memory +INST3(vcompressps, "vcompressps", IUM_WR, SSE38(0x8A), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Store sparse packed singles into dense memory +INST3(vcvtpd2qq, "vcvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to signed QWORDs +INST3(vcvtpd2udq, "vcvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs +INST3(vcvtpd2uqq, "vcvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned QWORDs +INST3(vcvtps2qq, "vcvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed singles to signed QWORDs +INST3(vcvtps2udq, "vcvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs +INST3(vcvtps2uqq, "vcvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned QWORDs +INST3(vcvtqq2pd, "vcvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles +INST3(vcvtqq2ps, "vcvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles +INST3(vcvtsd2usi32, "vcvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD +INST3(vcvtsd2usi64, "vcvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD +INST3(vcvtss2usi32, "vcvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi64, "vcvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvttpd2qq, "vcvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to signed QWORDs +INST3(vcvttpd2udq, "vcvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs +INST3(vcvttpd2uqq, "vcvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned QWORDs +INST3(vcvttps2qq, "vcvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to signed QWORDs +INST3(vcvttps2udq, "vcvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs +INST3(vcvttps2uqq, "vcvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned QWORDs +INST3(vcvttsd2usi32, "vcvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD +INST3(vcvttsd2usi64, "vcvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD +INST3(vcvttss2usi32, "vcvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), 7C, 1C, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi64, "vcvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), 8C, 1C, INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvtudq2pd, "vcvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles +INST3(vcvtudq2ps, "vcvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles +INST3(vcvtuqq2pd, "vcvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles +INST3(vcvtuqq2ps, "vcvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles +INST3(vcvtusi2sd32, "vcvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double +INST3(vcvtusi2sd64, "vcvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), 5C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double +INST3(vcvtusi2ss32, "vcvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single +INST3(vcvtusi2ss64, "vcvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single +INST3(vdbpsadbw, "vdbpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 3C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes +INST3(vexpandpd, "vexpandpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Load sparse packed doubles from dense memory +INST3(vexpandps, "vexpandps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x88), 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Load sparse packed singles from dense memory +INST3(vextractf32x8, "vextractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextractf64x2, "vextractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextractf64x4, "vextractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values +INST3(vextracti32x8, "vextracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vextracti64x2, "vextracti64x2", IUM_WR, SSE3A(0x39), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vextracti64x4, "vextracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values +INST3(vfixupimmpd, "vfixupimmpd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed double-precision floating-point values +INST3(vfixupimmps, "vfixupimmps", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x54), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed single-precision floating-point values +INST3(vfixupimmsd, "vfixupimmsd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar double-precision floating-point value +INST3(vfixupimmss, "vfixupimmss", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x55), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar single-precision floating-point value +INST3(vfpclasspd, "vfpclasspd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Test Types of Packed Float64 Values +INST3(vfpclassps, "vfpclassps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x66), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Test Types of Packed Float32 Values +INST3(vfpclasssd, "vfpclasssd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Test Types of Scalar Float64 Values +INST3(vfpclassss, "vfpclassss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x67), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar Float32 Values +INST3(vgatherdpd_msk, "vgatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices +INST3(vgatherdps_msk, "vgatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices +INST3(vgatherqpd_msk, "vgatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices +INST3(vgatherqps_msk, "vgatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices +INST3(vgetexppd, "vgetexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract exponents of packed double-precision floating-point values +INST3(vgetexpps, "vgetexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract exponents of packed single-precision floating-point values +INST3(vgetexpsd, "vgetexpsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar double-precision floating-point value +INST3(vgetexpss, "vgetexpss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar single-precision floating-point value +INST3(vgetmantpd, "vgetmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Extract mantissas of packed double-precision floating-point values +INST3(vgetmantps, "vgetmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Extract mantissas of packed single-precision floating-point values +INST3(vgetmantsd, "vgetmantsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar double-precision floating-point value +INST3(vgetmantss, "vgetmantss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar single-precision floating-point value +INST3(vinsertf32x8, "vinsertf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinsertf64x2, "vinsertf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinsertf64x4, "vinsertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values +INST3(vinserti32x8, "vinserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), 3C, 1C, INS_TT_TUPLE8, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vinserti64x2, "vinserti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), 3C, 1C, INS_TT_TUPLE2, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vinserti64x4, "vinserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), 3C, 1C, INS_TT_TUPLE4, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values +INST3(vmovdqa64, "vmovdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vmovdqu16, "vmovdqu16", IUM_WR, SSEDBL(0x7F), BAD_CODE, SSEDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vmovdqu64, "vmovdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX) +INST3(vmovdqu8, "vmovdqu8", IUM_WR, SSEDBL(0x7F), BAD_CODE, SSEDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX) +INST3(vpabsq, "vpabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Packed absolute value of 64-bit integers +INST3(vpandnq, "vpandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND NOT of two xmm regs +INST3(vpandq, "vpandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise AND of two xmm regs +INST3(vpblendmb, "vpblendmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int64 vectors using an OpMask control +INST3(vpblendmd, "vpblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Byte vectors using an OpMask control +INST3(vpblendmq, "vpblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int32 vectors using an OpMask control +INST3(vpblendmw, "vpblendmw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Word vectors using an OpMask control +INST3(vpbroadcastb_gpr, "vpbroadcastb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Broadcast int8 value from gpr to entire register +INST3(vpbroadcastd_gpr, "vpbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Broadcast int32 value from gpr to entire register +INST3(vpbroadcastq_gpr, "vpbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Broadcast int64 value from gpr to entire register +INST3(vpbroadcastw_gpr, "vpbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Broadcast int16 value from gpr to entire register +INST3(vpcmpb, "vpcmpb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpd, "vpcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpeqb, "vpcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality +INST3(vpcmpeqd, "vpcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality +INST3(vpcmpeqq, "vpcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(vpcmpeqw, "vpcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality +INST3(vpcmpgtb, "vpcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than +INST3(vpcmpgtd, "vpcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than +INST3(vpcmpgtq, "vpcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality +INST3(vpcmpgtw, "vpcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than +INST3(vpcmpq, "vpcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpub, "vpcmpub", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpud, "vpcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpuq, "vpcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpuw, "vpcmpuw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3E), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcmpw, "vpcmpw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3F), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(vpcompressd, "vpcompressd", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Store sparse packed doublewords into dense memory +INST3(vpcompressq, "vpcompressq", IUM_WR, SSE38(0x8B), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Store sparse packed quadwords into dense memory +INST3(vpconflictd, "vpconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Detect conflicts within a vector of packed dword values into dense memory/register +INST3(vpconflictq, "vpconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Detect conflicts within a vector of packed qword values into dense memory/register +INST3(vpermi2d, "vpermi2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2pd, "vpermi2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2ps, "vpermi2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x77), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2q, "vpermi2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x76), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2w, "vpermi2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), 7C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index +INST3(vpermpd_reg, "vpermpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register +INST3(vpermq_reg, "vpermq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register +INST3(vpermt2d, "vpermt2d", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2pd, "vpermt2pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2ps, "vpermt2ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7F), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2q, "vpermt2q", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7E), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2w, "vpermt2w", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), 7C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpermw, "vpermw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), 6C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Doublewords Elements +INST3(vpexpandd, "vpexpandd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), 6C, 2C, INS_TT_FULL_MEM, Input_32Bit | REX_W0 | Encoding_EVEX) // Load sparse packed doublewords from dense memory +INST3(vpexpandq, "vpexpandq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x89), 6C, 2C, INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) // Load sparse packed quadwords from dense memory +INST3(vpgatherdd_msk, "vpgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword +INST3(vpgatherdq_msk, "vpgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices +INST3(vpgatherqd_msk, "vpgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword +INST3(vpgatherqq_msk, "vpgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices +INST3(vplzcntd, "vplzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Count the number of leading zero bits for packed dword values +INST3(vplzcntq, "vplzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Count the number of leading zero bits for packed qword values +INST3(vpmaxsq, "vpmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit signed integers +INST3(vpmaxuq, "vpmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 64-bit unsigned integers +INST3(vpminsq, "vpminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit signed integers +INST3(vpminuq, "vpminuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 64-bit unsigned integers +INST3(vpmovb2m, "vpmovb2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovd2m, "vpmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovdb, "vpmovdb", IUM_WR, PSSE38(0xF3, 0x31), BAD_CODE, PSSE38(0xF3, 0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovdw, "vpmovdw", IUM_WR, PSSE38(0xF3, 0x33), BAD_CODE, PSSE38(0xF3, 0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovm2b, "vpmovm2b", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), 3C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2d, "vpmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), 1C, 1C, INS_TT_NONE, REX_W0 | Encoding_EVEX) +INST3(vpmovm2q, "vpmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), 1C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovm2w, "vpmovm2w", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x28), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovq2m, "vpmovq2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovqb, "vpmovqb", IUM_WR, PSSE38(0xF3, 0x32), BAD_CODE, PSSE38(0xF3, 0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovqd, "vpmovqd", IUM_WR, PSSE38(0xF3, 0x35), BAD_CODE, PSSE38(0xF3, 0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovqw, "vpmovqw", IUM_WR, PSSE38(0xF3, 0x34), BAD_CODE, PSSE38(0xF3, 0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsdb, "vpmovsdb", IUM_WR, PSSE38(0xF3, 0x21), BAD_CODE, PSSE38(0xF3, 0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovsdw, "vpmovsdw", IUM_WR, PSSE38(0xF3, 0x23), BAD_CODE, PSSE38(0xF3, 0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqb, "vpmovsqb", IUM_WR, PSSE38(0xF3, 0x22), BAD_CODE, PSSE38(0xF3, 0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqd, "vpmovsqd", IUM_WR, PSSE38(0xF3, 0x25), BAD_CODE, PSSE38(0xF3, 0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovsqw, "vpmovsqw", IUM_WR, PSSE38(0xF3, 0x24), BAD_CODE, PSSE38(0xF3, 0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovswb, "vpmovswb", IUM_WR, PSSE38(0xF3, 0x20), BAD_CODE, PSSE38(0xF3, 0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpmovusdb, "vpmovusdb", IUM_WR, PSSE38(0xF3, 0x11), BAD_CODE, PSSE38(0xF3, 0x11), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovusdw, "vpmovusdw", IUM_WR, PSSE38(0xF3, 0x13), BAD_CODE, PSSE38(0xF3, 0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqb, "vpmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_CODE, PSSE38(0xF3, 0x12), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqd, "vpmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovusqw, "vpmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_64Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) +INST3(vpmovuswb, "vpmovuswb", IUM_WR, PSSE38(0xF3, 0x10), BAD_CODE, PSSE38(0xF3, 0x10), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpmovw2m, "vpmovw2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x29), 3C, 1C, INS_TT_NONE, REX_W1 | Encoding_EVEX) +INST3(vpmovwb, "vpmovwb", IUM_WR, PSSE38(0xF3, 0x30), BAD_CODE, PSSE38(0xF3, 0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) +INST3(vpmullq, "vpmullq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 15C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 64 bit unsigned integers and store lower 64 bits of each result +INST3(vporq, "vporq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise OR of two xmm regs +INST3(vprold, "vprold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolq, "vprolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolvd, "vprolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprolvq, "vprolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left +INST3(vprord, "vprord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorq, "vprorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorvd, "vprorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vprorvq, "vprorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right +INST3(vpscatterdd_msk, "vpscatterdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Dword +INST3(vpscatterdq_msk, "vpscatterdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA0), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword with Signed Dword Indices +INST3(vpscatterqd_msk, "vpscatterqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Dword Values Using Signed Qword +INST3(vpscatterqq_msk, "vpscatterqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA1), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Qword with Signed Dword Indices +INST3(vpsllvw, "vpsllvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x12), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Left Logical +INST3(vpsraq, "vpsraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers +INST3(vpsravq, "vpsravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpsravw, "vpsravw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x11), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Arithmetic +INST3(vpsrlvw, "vpsrlvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Bit Shift Right Logical +INST3(vpternlogd, "vpternlogd", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic +INST3(vpternlogq, "vpternlogq", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0x25), 1C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic +INST3(vptestmb, "vptestmb", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestmd, "vptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestmq, "vptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestmw, "vptestmw", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask +INST3(vptestnmb, "vptestnmb", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vptestnmd, "vptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), 4C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vptestnmq, "vptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vptestnmw, "vptestnmw", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), 4C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vpxorq, "vpxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed bit-wise XOR of two xmm regs +INST3(vrangepd, "vrangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed double-precision floating-point values +INST3(vrangeps, "vrangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed single-precision floating-point values +INST3(vrangesd, "vrangesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar double-precision floating-point value +INST3(vrangess, "vrangess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar single-precision floating-point value +INST3(vrcp14pd, "vrcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of packed double-precision floating-point values +INST3(vrcp14ps, "vrcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of packed single-precision floating-point values +INST3(vrcp14sd, "vrcp14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar double-precision floating-point value +INST3(vrcp14ss, "vrcp14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar single-precision floating-point value +INST3(vreducepd, "vreducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Perform a reduction transformation on packed double-precision floating-point values +INST3(vreduceps, "vreduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Perform a reduction transformation on packed single-precision floating-point values +INST3(vreducesd, "vreducesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar double-precision floating-point value +INST3(vreducess, "vreducess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar single-precision floating-point value +INST3(vrndscalepd, "vrndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Round packed double-precision floating-point values to include a given number of fraction bits +INST3(vrndscaleps, "vrndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Round packed single-precision floating-point values to include a given number of fraction bits +INST3(vrndscalesd, "vrndscalesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double-precision floating-point value to include a given number of fraction bits +INST3(vrndscaless, "vrndscaless", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single-precision floating-point value to include a given number of fraction bits +INST3(vrsqrt14pd, "vrsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed double-precision floating-point values +INST3(vrsqrt14ps, "vrsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed single-precision floating-point values +INST3(vrsqrt14sd, "vrsqrt14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar double-precision floating-point value +INST3(vrsqrt14ss, "vrsqrt14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar single-precision floating-point value +INST3(vscalefpd, "vscalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed double-precision floating-point values +INST3(vscalefps, "vscalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed single-precision floating-point values +INST3(vscalefsd, "vscalefsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar double-precision floating-point value +INST3(vscalefss, "vscalefss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar single-precision floating-point value +INST3(vscatterdpd_msk, "vscatterdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices +INST3(vscatterdps_msk, "vscatterdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Dword +INST3(vscatterqpd_msk, "vscatterqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float64 with Signed Dword Indices +INST3(vscatterqps_msk, "vscatterqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xA3), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scatter Packed Float32 Values Using Signed Qword +INST3(vshuff32x4, "vshuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshuff64x2, "vshuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshufi32x4, "vshufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), 3C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshufi64x2, "vshufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity // Instructions for AVX512-VBMI -INST3(vpermb, "permb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), 3C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements -INST3(vpermi2b, "permi2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), 5C, 2C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index -INST3(vpermt2b, "permt2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), 5C, 2C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table -INST3(vpmultishiftqb, "pmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Select Packed Unaligned Bytes From Quadword Sources +INST3(vpermb, "vpermb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8D), 3C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute Packed Byte Elements +INST3(vpermi2b, "vpermi2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x75), 5C, 2C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting the Index +INST3(vpermt2b, "vpermt2b", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x7D), 5C, 2C, INS_TT_FULL_MEM, KMask_Base16 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute of Bytes from Two Tables Overwriting one Table +INST3(vpmultishiftqb, "vpmultishiftqb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x83), 3C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Select Packed Unaligned Bytes From Quadword Sources // Instructions for AVX512-BITALG, AVX512-VBMI2, AVX512-VPOPCNTDQ -INST3(vpcompressb, "pcompressb", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) // Store sparse packed bytes into dense memory -INST3(vpcompressw, "pcompressw", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX) // Store sparse packed words into dense memory -INST3(vpexpandb, "pexpandb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), 6C, 2C, INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) // Load sparse packed bytes from dense memory -INST3(vpexpandw, "pexpandw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), 6C, 2C, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX) // Load sparse packed words from dense memory -INST3(vpopcntb, "popcntb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in BYTE -INST3(vpopcntd, "popcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in DWORD -INST3(vpopcntq, "popcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in QWORD -INST3(vpopcntw, "popcntw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in WORD -INST3(vpshldd, "pshldd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical -INST3(vpshldq, "pshldq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical -INST3(vpshldvd, "pshldvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical -INST3(vpshldvq, "pshldvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical -INST3(vpshldvw, "pshldvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x70), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical -INST3(vpshldw, "pshldw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x70), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical -INST3(vpshrdd, "pshrdd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical -INST3(vpshrdq, "pshrdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical -INST3(vpshrdvd, "pshrdvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical -INST3(vpshrdvq, "pshrdvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical -INST3(vpshrdvw, "pshrdvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x72), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical -INST3(vpshrdw, "pshrdw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x72), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical -INST3(vpshufbitqmb, "pshufbitqmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Shuffle Bits From Quadword Elements Using Byte Indexes Into Mask +INST3(vpcompressb, "vpcompressb", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) // Store sparse packed bytes into dense memory +INST3(vpcompressw, "vpcompressw", IUM_WR, SSE38(0x63), BAD_CODE, BAD_CODE, 6C, 2C, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX) // Store sparse packed words into dense memory +INST3(vpexpandb, "vpexpandb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), 6C, 2C, INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) // Load sparse packed bytes from dense memory +INST3(vpexpandw, "vpexpandw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x62), 6C, 2C, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX) // Load sparse packed words from dense memory +INST3(vpopcntb, "vpopcntb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in BYTE +INST3(vpopcntd, "vpopcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in DWORD +INST3(vpopcntq, "vpopcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in QWORD +INST3(vpopcntw, "vpopcntw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Return the Count of Number of Bits Set to 1 in WORD +INST3(vpshldd, "vpshldd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshldq, "vpshldq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshldvd, "vpshldvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshldvq, "vpshldvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x71), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshldvw, "vpshldvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x70), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Left Logical +INST3(vpshldw, "vpshldw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x70), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Left Logical +INST3(vpshrdd, "vpshrdd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshrdq, "vpshrdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshrdvd, "vpshrdvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical +INST3(vpshrdvq, "vpshrdvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x73), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical +INST3(vpshrdvw, "vpshrdvw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x72), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Variable Shift Packed Data Right Logical +INST3(vpshrdw, "vpshrdw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x72), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W1 | Encoding_EVEX) // Concatenate and Shift Packed Data Right Logical +INST3(vpshufbitqmb, "vpshufbitqmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x8F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX) // Shuffle Bits From Quadword Elements Using Byte Indexes Into Mask // Instructions for AVX512-BF16, AVX512-FP16 -INST3(vaddph, "addph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Add Packed FP16 Values -INST3(vaddsh, "addsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Add Scalar FP16 Values -INST3(vcmpph, "cmpph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0xC2), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compare Packed FP16 Values -INST3(vcmpsh, "cmpsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0xF3, 0xC2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compare Scalar FP16 Values -INST3(vcomish, "comish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Ordered FP16 Values and Set EFLAGS -INST3(vcvtdq2ph, "cvtdq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Signed DWORD Integers to Packed FP16 Values -INST3(vcvtne2ps2bf16, "cvtne2ps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x72), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data -INST3(vcvtneps2bf16, "cvtneps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x72), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data -INST3(vcvtpd2ph, "cvtpd2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Double Precision FP Values to Packed FP16 Values -INST3(vcvtph2dq, "cvtph2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed DWORD Integers -INST3(vcvtph2pd, "cvtph2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Double Precision FP Values -INST3(vcvtph2psx, "cvtph2psx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Single Precision FP Values -INST3(vcvtph2qq, "cvtph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed QWORD Integers -INST3(vcvtph2udq, "cvtph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned DWORD Integers -INST3(vcvtph2uqq, "cvtph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned QWORD Integers -INST3(vcvtph2uw, "cvtph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned WORD Integers -INST3(vcvtph2w, "cvtph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed WORD Integers -INST3(vcvtps2phx, "cvtps2phx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vcvtqq2ph, "cvtqq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Signed QWORD Integers to Packed FP16 Values -INST3(vcvtsd2sh, "cvtsd2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Double Precision FP Value to Scalar FP16 Value -INST3(vcvtsh2sd, "cvtsh2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Double Precision FP Value -INST3(vcvtsh2si32, "cvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed DWORD Integer -INST3(vcvtsh2si64, "cvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed QWORD Integer -INST3(vcvtsh2ss, "cvtsh2ss", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Single Precision FP Value -INST3(vcvtsh2usi32, "cvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned DWORD Integer -INST3(vcvtsh2usi64, "cvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned QWORD Integer -INST3(vcvtsi2sh32, "cvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Signed DWORD Integer to Scalar FP16 Value -INST3(vcvtsi2sh64, "cvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Signed QWORD Integer to Scalar FP16 Value -INST3(vcvtss2sh, "cvtss2sh", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Single Precision FP Value to Scalar FP16 Value -INST3(vcvttph2dq, "cvttph2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed DWORD Integers -INST3(vcvttph2qq, "cvttph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed QWORD Integers -INST3(vcvttph2udq, "cvttph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned DWORD Integers -INST3(vcvttph2uqq, "cvttph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned QWORD Integers -INST3(vcvttph2uw, "cvttph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned WORD Integers -INST3(vcvttph2w, "cvttph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed WORD Integers -INST3(vcvttsh2si32, "cvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed DWORD Integer -INST3(vcvttsh2si64, "cvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed QWORD Integer -INST3(vcvttsh2usi32, "cvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned DWORD Integer -INST3(vcvttsh2usi64, "cvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned QWORD Integer -INST3(vcvtudq2ph, "cvtudq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vcvtuqq2ph, "cvtuqq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vcvtusi2sh32, "cvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Unsigned DWORD Integer to Scalar FP16 Value -INST3(vcvtusi2sh64, "cvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Unsigned QWORD Integer to Scalar FP16 Value -INST3(vcvtuw2ph, "cvtuw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vcvtw2ph, "cvtw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vdivph, "divph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Divide Packed FP16 Values -INST3(vdivsh, "divsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Divide Scalar FP16 Values -INST3(vdpbf16ps, "dpbf16ps", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x52), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Dot Product of BF16 Pairs Accumulated Into Packed Single Precision -INST3(vfcmaddcph, "fcmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values -INST3(vfcmaddcsh, "fcmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values -INST3(vfcmulcph, "fcmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values -INST3(vfcmulcsh, "fcmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD7), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values -INST3(vfmadd132ph, "vfmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x98), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values -INST3(vfmadd132sh, "vfmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x99), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values -INST3(vfmadd213ph, "vfmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values -INST3(vfmadd213sh, "vfmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values -INST3(vfmadd231ph, "vfmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values -INST3(vfmadd231sh, "vfmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values -INST3(vfmaddcph, "fmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values -INST3(vfmaddcsh, "fmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values -INST3(vfmaddsub132ph, "vfmaddsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x96), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values -INST3(vfmaddsub213ph, "vfmaddsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA6), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values -INST3(vfmaddsub231ph, "vfmaddsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB6), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values -INST3(vfmsub132ph, "vfmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values -INST3(vfmsub132sh, "vfmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values -INST3(vfmsub213ph, "vfmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values -INST3(vfmsub213sh, "vfmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAB), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values -INST3(vfmsub231ph, "vfmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values -INST3(vfmsub231sh, "vfmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBB), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values -INST3(vfmsubadd132ph, "vfmsubadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x97), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values -INST3(vfmsubadd213ph, "vfmsubadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA7), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values -INST3(vfmsubadd231ph, "vfmsubadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB7), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values -INST3(vfmulcph, "fmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values -INST3(vfmulcsh, "fmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD7), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values -INST3(vfnmadd132ph, "vfnmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values -INST3(vfnmadd132sh, "vfnmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values -INST3(vfnmadd213ph, "vfnmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values -INST3(vfnmadd213sh, "vfnmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAD), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values -INST3(vfnmadd231ph, "vfnmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values -INST3(vfnmadd231sh, "vfnmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBD), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values -INST3(vfnmsub132ph, "vfnmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values -INST3(vfnmsub132sh, "vfnmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values -INST3(vfnmsub213ph, "vfnmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAE), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values -INST3(vfnmsub213sh, "vfnmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values -INST3(vfnmsub231ph, "vfnmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBE), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values -INST3(vfnmsub231sh, "vfnmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values -INST3(vfpclassph, "fpclassph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Test Types of Packed FP16 Values -INST3(vfpclasssh, "fpclasssh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x67), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar FP16 Values -INST3(vgetexpph, "getexpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x42), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Exponents of Packed FP16 Values to FP16 Values -INST3(vgetexpsh, "getexpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x43), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Exponents of Scalar FP16 Values to FP16 Values -INST3(vgetmantph, "getmantph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x26), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Packed FP16 Values -INST3(vgetmantsh, "getmantsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x27), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Scalar FP16 Values -INST3(vmaxph, "maxph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Maximum of Packed FP16 Values -INST3(vmaxsh, "maxsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Maximum of Scalar FP16 Values -INST3(vminph, "minph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Minimum of Packed FP16 Values -INST3(vminsh, "minsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Minimum of Scalar FP16 Values -INST3(vmovsh, "movsh", IUM_WR, SSEFLTMAP(0x00, 0x11), BAD_CODE, SSEFLTMAP(0x00, 0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move Scalar FP16 Value -INST3(vmovw, "movw", IUM_WR, PCKDBLMAP(0x06, 0x7E), BAD_CODE, PCKDBLMAP(0x00, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_WIG | Encoding_EVEX) // Move Word -INST3(vmulph, "mulph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Multiply Packed FP16 Values -INST3(vmulsh, "mulsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Multiply Scalar FP16 Values -INST3(vrcpph, "rcpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Packed FP16 Values -INST3(vrcpsh, "rcpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Scalar FP16 Values -INST3(vreduceph, "reduceph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Packed FP16 Values -INST3(vreducesh, "reducesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Scalar FP16 Values -INST3(vrndscaleph, "rndscaleph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x08), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Round Packed FP16 Values to Include a Given Number of Fraction Bits -INST3(vrndscalesh, "rndscalesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x0A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Round Scalar FP16 Values to Include a Given Number of Fraction Bits -INST3(vrsqrtph, "rsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Packed FP16 Values -INST3(vrsqrtsh, "rsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Scalar FP16 Values -INST3(vscalefph, "scalefph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Scale Packed FP16 Values with FP16 Values -INST3(vscalefsh, "scalefsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Scale Scalar FP16 Values with FP16 Values -INST3(vsqrtph, "sqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute Square Root of Packed FP16 Values -INST3(vsqrtsh, "sqrtsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute Square Root of Scalar FP16 Values -INST3(vsubph, "subph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Subtract Packed FP16 Values -INST3(vsubsh, "subsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Subtract Scalar FP16 Values -INST3(vucomish, "ucomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Unordered FP16 Values and Set EFLAGS +INST3(vaddph, "vaddph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Add Packed FP16 Values +INST3(vaddsh, "vaddsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Add Scalar FP16 Values +INST3(vcmpph, "vcmpph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0xC2), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compare Packed FP16 Values +INST3(vcmpsh, "vcmpsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0xF3, 0xC2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compare Scalar FP16 Values +INST3(vcomish, "vcomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Ordered FP16 Values and Set EFLAGS +INST3(vcvtdq2ph, "vcvtdq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Signed DWORD Integers to Packed FP16 Values +INST3(vcvtne2ps2bf16, "vcvtne2ps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x72), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data +INST3(vcvtneps2bf16, "vcvtneps2bf16", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x72), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Two Packed Single Data to One Packed BF16 Data +INST3(vcvtpd2ph, "vcvtpd2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Double Precision FP Values to Packed FP16 Values +INST3(vcvtph2dq, "vcvtph2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed DWORD Integers +INST3(vcvtph2pd, "vcvtph2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Double Precision FP Values +INST3(vcvtph2psx, "vcvtph2psx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Single Precision FP Values +INST3(vcvtph2qq, "vcvtph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed QWORD Integers +INST3(vcvtph2udq, "vcvtph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned DWORD Integers +INST3(vcvtph2uqq, "vcvtph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned QWORD Integers +INST3(vcvtph2uw, "vcvtph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Unsigned WORD Integers +INST3(vcvtph2w, "vcvtph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed WORD Integers +INST3(vcvtps2phx, "vcvtps2phx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtqq2ph, "vcvtqq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Signed QWORD Integers to Packed FP16 Values +INST3(vcvtsd2sh, "vcvtsd2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Double Precision FP Value to Scalar FP16 Value +INST3(vcvtsh2sd, "vcvtsh2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Double Precision FP Value +INST3(vcvtsh2si32, "vcvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed DWORD Integer +INST3(vcvtsh2si64, "vcvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed QWORD Integer +INST3(vcvtsh2ss, "vcvtsh2ss", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Single Precision FP Value +INST3(vcvtsh2usi32, "vcvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned DWORD Integer +INST3(vcvtsh2usi64, "vcvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned QWORD Integer +INST3(vcvtsi2sh32, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Signed DWORD Integer to Scalar FP16 Value +INST3(vcvtsi2sh64, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Signed QWORD Integer to Scalar FP16 Value +INST3(vcvtss2sh, "vcvtss2sh", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Single Precision FP Value to Scalar FP16 Value +INST3(vcvttph2dq, "vcvttph2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed DWORD Integers +INST3(vcvttph2qq, "vcvttph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed QWORD Integers +INST3(vcvttph2udq, "vcvttph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned DWORD Integers +INST3(vcvttph2uqq, "vcvttph2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned QWORD Integers +INST3(vcvttph2uw, "vcvttph2uw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x7C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned WORD Integers +INST3(vcvttph2w, "vcvttph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed WORD Integers +INST3(vcvttsh2si32, "vcvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed DWORD Integer +INST3(vcvttsh2si64, "vcvttsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Signed QWORD Integer +INST3(vcvttsh2usi32, "vcvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned DWORD Integer +INST3(vcvttsh2usi64, "vcvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned QWORD Integer +INST3(vcvtudq2ph, "vcvtudq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtuqq2ph, "vcvtuqq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtusi2sh32, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Unsigned DWORD Integer to Scalar FP16 Value +INST3(vcvtusi2sh64, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Unsigned QWORD Integer to Scalar FP16 Value +INST3(vcvtuw2ph, "vcvtuw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vcvtw2ph, "vcvtw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values +INST3(vdivph, "vdivph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Divide Packed FP16 Values +INST3(vdivsh, "vdivsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Divide Scalar FP16 Values +INST3(vdpbf16ps, "vdpbf16ps", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x52), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Dot Product of BF16 Pairs Accumulated Into Packed Single Precision +INST3(vfcmaddcph, "vfcmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values +INST3(vfcmaddcsh, "vfcmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values +INST3(vfcmulcph, "vfcmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values +INST3(vfcmulcsh, "vfcmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD7), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values +INST3(vfmadd132ph, "vvfmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x98), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd132sh, "vvfmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x99), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmadd213ph, "vvfmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd213sh, "vvfmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmadd231ph, "vvfmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd231sh, "vvfmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmaddcph, "vfmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values +INST3(vfmaddcsh, "vfmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values +INST3(vfmaddsub132ph, "vvfmaddsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x96), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmaddsub213ph, "vvfmaddsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA6), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmaddsub231ph, "vvfmaddsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB6), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Add/Subtract of Packed FP16 Values +INST3(vfmsub132ph, "vvfmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub132sh, "vvfmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfmsub213ph, "vvfmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub213sh, "vvfmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAB), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfmsub231ph, "vvfmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Packed FP16 Values +INST3(vfmsub231sh, "vvfmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBB), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Subtract of Scalar FP16 Values +INST3(vfmsubadd132ph, "vvfmsubadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x97), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfmsubadd213ph, "vvfmsubadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA7), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfmsubadd231ph, "vvfmsubadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB7), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Alternating Subtract/Add of Packed FP16 Values +INST3(vfmulcph, "vfmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values +INST3(vfmulcsh, "vfmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0xD7), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values +INST3(vfnmadd132ph, "vvfnmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd132sh, "vvfnmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfnmadd213ph, "vvfnmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd213sh, "vvfnmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAD), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfnmadd231ph, "vvfnmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Packed FP16 Values +INST3(vfnmadd231sh, "vvfnmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBD), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Add of Scalar FP16 Values +INST3(vfnmsub132ph, "vvfnmsub132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub132sh, "vvfnmsub132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x9F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfnmsub213ph, "vvfnmsub213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAE), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub213sh, "vvfnmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfnmsub231ph, "vvfnmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBE), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values +INST3(vfnmsub231sh, "vvfnmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values +INST3(vfpclassph, "vfpclassph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Test Types of Packed FP16 Values +INST3(vfpclasssh, "vfpclasssh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x67), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar FP16 Values +INST3(vgetexpph, "vgetexpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x42), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Exponents of Packed FP16 Values to FP16 Values +INST3(vgetexpsh, "vgetexpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x43), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Exponents of Scalar FP16 Values to FP16 Values +INST3(vgetmantph, "vgetmantph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x26), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Packed FP16 Values +INST3(vgetmantsh, "vgetmantsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x27), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Scalar FP16 Values +INST3(vmaxph, "vmaxph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Maximum of Packed FP16 Values +INST3(vmaxsh, "vmaxsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Maximum of Scalar FP16 Values +INST3(vminph, "vminph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Minimum of Packed FP16 Values +INST3(vminsh, "vminsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Minimum of Scalar FP16 Values +INST3(vmovsh, "vmovsh", IUM_WR, SSEFLTMAP(0x00, 0x11), BAD_CODE, SSEFLTMAP(0x00, 0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move Scalar FP16 Value +INST3(vmovw, "vmovw", IUM_WR, PCKDBLMAP(0x06, 0x7E), BAD_CODE, PCKDBLMAP(0x00, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_WIG | Encoding_EVEX) // Move Word +INST3(vmulph, "vmulph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Multiply Packed FP16 Values +INST3(vmulsh, "vmulsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Multiply Scalar FP16 Values +INST3(vrcpph, "vrcpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Packed FP16 Values +INST3(vrcpsh, "vrcpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Scalar FP16 Values +INST3(vreduceph, "vreduceph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Packed FP16 Values +INST3(vreducesh, "vreducesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Scalar FP16 Values +INST3(vrndscaleph, "vrndscaleph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x08), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Round Packed FP16 Values to Include a Given Number of Fraction Bits +INST3(vrndscalesh, "vrndscalesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x0A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Round Scalar FP16 Values to Include a Given Number of Fraction Bits +INST3(vrsqrtph, "vrsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Packed FP16 Values +INST3(vrsqrtsh, "vrsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Scalar FP16 Values +INST3(vscalefph, "vscalefph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Scale Packed FP16 Values with FP16 Values +INST3(vscalefsh, "vscalefsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Scale Scalar FP16 Values with FP16 Values +INST3(vsqrtph, "vsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute Square Root of Packed FP16 Values +INST3(vsqrtsh, "vsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute Square Root of Scalar FP16 Values +INST3(vsubph, "vsubph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Subtract Packed FP16 Values +INST3(vsubsh, "vsubsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Subtract Scalar FP16 Values +INST3(vucomish, "vucomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Unordered FP16 Values and Set EFLAGS // AVX512-VP2INTERSECT -INST3(vp2intersectd, "p2intersectd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute Intersection Between DWORDS to a Pair of Mask Registers -INST3(vp2intersectq, "p2intersectq", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute Intersection Between QWORDS to a Pair of Mask Registers +INST3(vp2intersectd, "vp2intersectd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute Intersection Between DWORDS to a Pair of Mask Registers +INST3(vp2intersectq, "vp2intersectq", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute Intersection Between QWORDS to a Pair of Mask Registers // Instructions for AVX10v2 -INST3(vcomxsd, "comxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags -INST3(vcomxss, "comxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags -INST3(vcvtps2ibs, "cvtps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x69), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) -INST3(vcvtps2iubs, "cvtps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6B), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttpd2dqs, "cvttpd2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to DWORDs -INST3(vcvttpd2qqs, "cvttpd2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs -INST3(vcvttpd2udqs, "cvttpd2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to unsigned DWORDs -INST3(vcvttpd2uqqs, "cvttpd2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs -INST3(vcvttps2dqs, "cvttps2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to DWORDs -INST3(vcvttps2ibs, "cvttps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x68), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttps2iubs, "cvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttps2qqs, "cvttps2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to signed QWORDs -INST3(vcvttps2udqs, "cvttps2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned DWORDs -INST3(vcvttps2uqqs, "cvttps2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned QWORDs -INST3(vcvttsd2sis32, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs -INST3(vcvttsd2sis64, "cvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs -INST3(vcvttsd2usis32, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned DWORD -INST3(vcvttsd2usis64, "cvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned QWORD -INST3(vcvttss2sis32, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD -INST3(vcvttss2sis64, "cvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD -INST3(vcvttss2usis32, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vcvttss2usis64, "cvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD -INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles -INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles -INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double -INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single -INST3(vmovd_simd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs -INST3(vmovw_simd, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs -INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), 4C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags -INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags +INST3(vcomxsd, "vcomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags +INST3(vcomxss, "vcomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags +INST3(vcvtps2ibs, "vcvtps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x69), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) +INST3(vcvtps2iubs, "vcvtps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6B), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttpd2dqs, "vcvttpd2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to DWORDs +INST3(vcvttpd2qqs, "vcvttpd2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs +INST3(vcvttpd2udqs, "vcvttpd2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to unsigned DWORDs +INST3(vcvttpd2uqqs, "vcvttpd2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation packed doubles to signed QWORDs +INST3(vcvttps2dqs, "vcvttps2dqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to DWORDs +INST3(vcvttps2ibs, "vcvttps2ibs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x68), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttps2iubs, "vcvttps2iubs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6A), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttps2qqs, "vcvttps2qqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6D), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to signed QWORDs +INST3(vcvttps2udqs, "vcvttps2udqs", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned DWORDs +INST3(vcvttps2uqqs, "vcvttps2uqqs", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x6C), ILLEGAL, ILLEGAL, INS_TT_HALF, Input_32Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation packed singles to unsigned QWORDs +INST3(vcvttsd2sis32, "vcvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs +INST3(vcvttsd2sis64, "vcvttsd2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to signed DWORDs +INST3(vcvttsd2usis32, "vcvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned DWORD +INST3(vcvttsd2usis64, "vcvttsd2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar double to unsigned QWORD +INST3(vcvttss2sis32, "vcvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD +INST3(vcvttss2sis64, "vcvttss2sis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6D), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to DWORD +INST3(vcvttss2usis32, "vcvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usis64, "vcvttss2usis", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x6C), 7C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt with truncation/saturation scalar single to unsigned DWORD/QWORD +INST3(vminmaxpd, "vminmaxpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed doubles +INST3(vminmaxps, "vminmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles +INST3(vminmaxsd, "vminmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar double +INST3(vminmaxss, "vminmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single +INST3(vmovd_simd, "vmovd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs +INST3(vmovw_simd, "vmovw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs +INST3(vmpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), 4C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference +INST3(vpdpbssd, "vpdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "vpdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "vpdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "vpdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "vpdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "vpdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpwsud, "vpdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "vpdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "vpdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "vpdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "vpdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "vpdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vucomxsd, "vucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags +INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags #define LAST_AVX512_INSTRUCTION INS_vucomxss // id nm um mr mi rm lat tp tt flags #define FIRST_APX_INSTRUCTION INS_ccmpo -INST5(push2, "push2", IUM_RD, 0x0030FF, BAD_CODE, 0x0030FF, BAD_CODE, 0x0030FF, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_Flags_Has_NDD) -INST5(pop2, "pop2", IUM_WR, 0x00008F, BAD_CODE, 0x00008F, BAD_CODE, 0x00008F, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_Flags_Has_NDD) #define FIRST_CCMP_INSTRUCTION INS_ccmpo INST3(ccmpo, "ccmpo", IUM_RD, 0x000038, 0x0003880, 0x00003A, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ccmpno, "ccmpno", IUM_RD, 0x000038, 0x0003880, 0x00003A, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit)