From 5bc2b34ca1b612c329700793671f045e35f9361e Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 25 Mar 2025 19:57:09 -0700 Subject: [PATCH 01/32] Add CPUID for AvxVnniInt8 and AvxVnniInt16 --- src/coreclr/inc/clrconfigvalues.h | 2 + src/coreclr/inc/corinfoinstructionset.h | 180 +++++++++----- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 4 + src/coreclr/jit/hwintrinsic.cpp | 6 + src/coreclr/jit/jitconfigvalues.h | 2 + .../Compiler/HardwareIntrinsicHelpers.cs | 16 ++ .../Common/Compiler/InstructionSetSupport.cs | 7 + .../tools/Common/InstructionSetHelpers.cs | 4 + .../Runtime/ReadyToRunInstructionSet.cs | 4 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 12 + .../JitInterface/CorInfoInstructionSet.cs | 226 +++++++++++++----- .../ThunkGenerator/InstructionSetDesc.txt | 10 + src/coreclr/vm/codeman.cpp | 18 ++ src/native/minipal/cpufeatures.c | 12 + src/native/minipal/cpufeatures.h | 2 + 16 files changed, 398 insertions(+), 117 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 659452eec91a2a..0276b5120c8f86 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -694,6 +694,8 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512V RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT8, W("EnableAVXVNNIINT8"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT16, W("EnableAVXVNNIINT16"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index cc446a6571f586..a22dcd30f9d459 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -96,34 +96,40 @@ enum CORINFO_InstructionSet InstructionSet_GFNI=44, InstructionSet_GFNI_V256=45, InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_AVXVNNIINT8=47, + InstructionSet_AVXVNNIINT8_V512=48, + InstructionSet_AVXVNNIINT16=49, + InstructionSet_AVXVNNIINT16_V512=50, + InstructionSet_X86Base_X64=51, + InstructionSet_SSE_X64=52, + InstructionSet_SSE2_X64=53, + InstructionSet_SSE3_X64=54, + InstructionSet_SSSE3_X64=55, + InstructionSet_SSE41_X64=56, + InstructionSet_SSE42_X64=57, + InstructionSet_AVX_X64=58, + InstructionSet_AVX2_X64=59, + InstructionSet_AES_X64=60, + InstructionSet_BMI1_X64=61, + InstructionSet_BMI2_X64=62, + InstructionSet_FMA_X64=63, + InstructionSet_LZCNT_X64=64, + InstructionSet_PCLMULQDQ_X64=65, + InstructionSet_POPCNT_X64=66, + InstructionSet_AVXVNNI_X64=67, + InstructionSet_X86Serialize_X64=68, + InstructionSet_AVX512F_X64=69, + InstructionSet_AVX512BW_X64=70, + InstructionSet_AVX512CD_X64=71, + InstructionSet_AVX512DQ_X64=72, + InstructionSet_AVX512VBMI_X64=73, + InstructionSet_AVX10v1_X64=74, + InstructionSet_AVX10v1_V512_X64=75, + InstructionSet_AVX10v2_X64=76, + InstructionSet_AVX10v2_V512_X64=77, + InstructionSet_GFNI_X64=78, + InstructionSet_AVXVNNIINT8_X64=79, + InstructionSet_AVXVNNIINT16_X64=80, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -172,34 +178,40 @@ enum CORINFO_InstructionSet InstructionSet_GFNI=44, InstructionSet_GFNI_V256=45, InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_AVXVNNIINT8=47, + InstructionSet_AVXVNNIINT8_V512=48, + InstructionSet_AVXVNNIINT16=49, + InstructionSet_AVXVNNIINT16_V512=50, + InstructionSet_X86Base_X64=51, + InstructionSet_SSE_X64=52, + InstructionSet_SSE2_X64=53, + InstructionSet_SSE3_X64=54, + InstructionSet_SSSE3_X64=55, + InstructionSet_SSE41_X64=56, + InstructionSet_SSE42_X64=57, + InstructionSet_AVX_X64=58, + InstructionSet_AVX2_X64=59, + InstructionSet_AES_X64=60, + InstructionSet_BMI1_X64=61, + InstructionSet_BMI2_X64=62, + InstructionSet_FMA_X64=63, + InstructionSet_LZCNT_X64=64, + InstructionSet_PCLMULQDQ_X64=65, + InstructionSet_POPCNT_X64=66, + InstructionSet_AVXVNNI_X64=67, + InstructionSet_X86Serialize_X64=68, + InstructionSet_AVX512F_X64=69, + InstructionSet_AVX512BW_X64=70, + InstructionSet_AVX512CD_X64=71, + InstructionSet_AVX512DQ_X64=72, + InstructionSet_AVX512VBMI_X64=73, + InstructionSet_AVX10v1_X64=74, + InstructionSet_AVX10v1_V512_X64=75, + InstructionSet_AVX10v2_X64=76, + InstructionSet_AVX10v2_V512_X64=77, + InstructionSet_GFNI_X64=78, + InstructionSet_AVXVNNIINT8_X64=79, + InstructionSet_AVXVNNIINT16_X64=80, #endif // TARGET_X86 }; @@ -375,6 +387,10 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v2_V512_X64); if (HasInstructionSet(InstructionSet_GFNI)) AddInstructionSet(InstructionSet_GFNI_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT8)) + AddInstructionSet(InstructionSet_AVXVNNIINT8_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT16)) + AddInstructionSet(InstructionSet_AVXVNNIINT16_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -579,6 +595,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -693,6 +717,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -821,6 +853,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -1061,6 +1101,18 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "GFNI_V256"; case InstructionSet_GFNI_V512 : return "GFNI_V512"; + case InstructionSet_AVXVNNIINT8 : + return "AVXVNNIINT8"; + case InstructionSet_AVXVNNIINT8_X64 : + return "AVXVNNIINT8_X64"; + case InstructionSet_AVXVNNIINT8_V512 : + return "AVXVNNIINT8_V512"; + case InstructionSet_AVXVNNIINT16 : + return "AVXVNNIINT16"; + case InstructionSet_AVXVNNIINT16_X64 : + return "AVXVNNIINT16_X64"; + case InstructionSet_AVXVNNIINT16_V512 : + return "AVXVNNIINT16_V512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1155,6 +1207,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "GFNI_V256"; case InstructionSet_GFNI_V512 : return "GFNI_V512"; + case InstructionSet_AVXVNNIINT8 : + return "AVXVNNIINT8"; + case InstructionSet_AVXVNNIINT8_V512 : + return "AVXVNNIINT8_V512"; + case InstructionSet_AVXVNNIINT16 : + return "AVXVNNIINT16"; + case InstructionSet_AVXVNNIINT16_V512 : + return "AVXVNNIINT16_V512"; #endif // TARGET_X86 default: @@ -1239,6 +1299,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1284,6 +1348,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index db3480acadc98a..0c66d1ca9d3897 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 63dcb8b8-1f9d-43d8-bb09-bf5d8bf85ad4 */ - 0x63dcb8b8, - 0x1f9d, - 0x43d8, - {0xbb, 0x09, 0xbf, 0x5d, 0x8b, 0xf8, 0x5a, 0xd4} +constexpr GUID JITEEVersionIdentifier = { /* 2613a833-78b8-41db-9fb0-175d839f5e25 */ + 0x2613a833, + 0x78b8, + 0x41db, + {0x9f, 0xb0, 0x17, 0x5d, 0x83, 0x9f, 0x5e, 0x25} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index ea250df0125e47..a5b62354ffcc90 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -67,6 +67,10 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Zba=57, READYTORUN_INSTRUCTION_Zbb=58, READYTORUN_INSTRUCTION_Sve2=59, + READYTORUN_INSTRUCTION_AvxVnniInt8=60, + READYTORUN_INSTRUCTION_AvxVnniInt8_V512=61, + READYTORUN_INSTRUCTION_AvxVnniInt16=62, + READYTORUN_INSTRUCTION_AvxVnniInt16_V512=63, }; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 71e07025dad03c..c09816ca7ad794 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -812,6 +812,10 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_GFNI, LAST_NI_GFNI }, { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, + { NI_Illegal, NI_Illegal }, // AVXVNNIINT8 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT8_V512 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT16 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT16_V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, @@ -840,6 +844,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // AVX10v2_X64 { NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64 { NI_Illegal, NI_Illegal }, // GFNI_X64 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT8_X64 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT16_X64 #elif defined (TARGET_ARM64) { FIRST_NI_ArmBase, LAST_NI_ArmBase }, { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 92a1cb5bf8f9ae..6952521e39b356 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -412,6 +412,8 @@ RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 1) // Allows AVX10v2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT8, "EnableAVXVNNIINT8", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT16, "EnableAVXVNNIINT16", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableBMI1, "EnableBMI1", 1) // Allows BMI1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2", 1) // Allows BMI2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index f552e32731ace1..f54a075816cda4 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -85,6 +85,8 @@ private static class XArchIntrinsicConstants public const int Vpclmulqdq = 0x200000; public const int Avx10v2 = 0x400000; public const int Gfni = 0x800000; + public const int AvxVnniInt8 = 0x1000000; + public const int AvxVnniInt16 = 0x2000000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -154,6 +156,14 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v2"); if (((flags & Avx10v2) != 0) && ((flags & Avx512) != 0)) builder.AddSupportedInstructionSet("avx10v2_v512"); + if ((flags & AvxVnniInt8) != 0) + builder.AddSupportedInstructionSet("avxvnniint8"); + if (((flags & AvxVnniInt8) != 0) && ((flags & Avx512) != 0)) + builder.AddSupportedInstructionSet("avxvnniint8_v512"); + if ((flags & AvxVnniInt16) != 0) + builder.AddSupportedInstructionSet("avxvnniint16"); + if (((flags & AvxVnniInt16) != 0) && ((flags & Avx512) != 0)) + builder.AddSupportedInstructionSet("avxvnniint16_v512"); if ((flags & Gfni) != 0) { builder.AddSupportedInstructionSet("gfni"); @@ -235,6 +245,12 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_X64 => Gfni, InstructionSet.X64_GFNI_V256 => (Gfni | Avx), InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), + InstructionSet.X64_AVXVNNIINT8 => AvxVnniInt8, + InstructionSet.X64_AVXVNNIINT8_X64 => AvxVnniInt8, + InstructionSet.X64_AVXVNNIINT8_V512 => (AvxVnniInt8 | Avx512), + InstructionSet.X64_AVXVNNIINT16 => AvxVnniInt16, + InstructionSet.X64_AVXVNNIINT16_X64 => AvxVnniInt16, + InstructionSet.X64_AVXVNNIINT16_V512 => (AvxVnniInt16 | Avx512), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index de972aced7c565..debc5f1d1f1ff4 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -366,6 +366,13 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, _supportedInstructionSets.Add("vpclmul_v512"); } + if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx10v2_v512"))) + { + // AvxVnniInt8 and AvxVnniInt16 512 bit should also be enabled + _supportedInstructionSets.Add("avxvnniint8_v512"); + _supportedInstructionSets.Add("avxvnniint16_v512"); + } + if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx"))) { // These ISAs should automatically extend to 256-bit if diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index f9e9361fb462c4..83e48ba1856c26 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -233,6 +233,10 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index a2ab0bd6d39a25..3594b28dc1f7ac 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -70,6 +70,10 @@ public enum ReadyToRunInstructionSet Zba=57, Zbb=58, Sve2=59, + AvxVnniInt8=60, + AvxVnniInt8_V512=61, + AvxVnniInt16=62, + AvxVnniInt16_V512=63, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 36847ce414ed99..a97be795e55e65 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -145,6 +145,12 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni; case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; + case InstructionSet.X64_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT8_X64: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X64_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X64_AVXVNNIINT16_X64: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X64_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; default: throw new Exception("Unknown instruction set"); } @@ -228,6 +234,12 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_GFNI_X64: return null; case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; + case InstructionSet.X86_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X86_AVXVNNIINT8_X64: return null; + case InstructionSet.X86_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X86_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X86_AVXVNNIINT16_X64: return null; + case InstructionSet.X86_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 758d50e1706507..1675542c832df8 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -92,6 +92,10 @@ public enum InstructionSet X64_GFNI = InstructionSet_X64.GFNI, X64_GFNI_V256 = InstructionSet_X64.GFNI_V256, X64_GFNI_V512 = InstructionSet_X64.GFNI_V512, + X64_AVXVNNIINT8 = InstructionSet_X64.AVXVNNIINT8, + X64_AVXVNNIINT8_V512 = InstructionSet_X64.AVXVNNIINT8_V512, + X64_AVXVNNIINT16 = InstructionSet_X64.AVXVNNIINT16, + X64_AVXVNNIINT16_V512 = InstructionSet_X64.AVXVNNIINT16_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -120,6 +124,8 @@ public enum InstructionSet X64_AVX10v2_X64 = InstructionSet_X64.AVX10v2_X64, X64_AVX10v2_V512_X64 = InstructionSet_X64.AVX10v2_V512_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, + X64_AVXVNNIINT8_X64 = InstructionSet_X64.AVXVNNIINT8_X64, + X64_AVXVNNIINT16_X64 = InstructionSet_X64.AVXVNNIINT16_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -166,6 +172,10 @@ public enum InstructionSet X86_GFNI = InstructionSet_X86.GFNI, X86_GFNI_V256 = InstructionSet_X86.GFNI_V256, X86_GFNI_V512 = InstructionSet_X86.GFNI_V512, + X86_AVXVNNIINT8 = InstructionSet_X86.AVXVNNIINT8, + X86_AVXVNNIINT8_V512 = InstructionSet_X86.AVXVNNIINT8_V512, + X86_AVXVNNIINT16 = InstructionSet_X86.AVXVNNIINT16, + X86_AVXVNNIINT16_V512 = InstructionSet_X86.AVXVNNIINT16_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -194,6 +204,8 @@ public enum InstructionSet X86_AVX10v2_X64 = InstructionSet_X86.AVX10v2_X64, X86_AVX10v2_V512_X64 = InstructionSet_X86.AVX10v2_V512_X64, X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, + X86_AVXVNNIINT8_X64 = InstructionSet_X86.AVXVNNIINT8_X64, + X86_AVXVNNIINT16_X64 = InstructionSet_X86.AVXVNNIINT16_X64, } public enum InstructionSet_ARM64 { @@ -287,34 +299,40 @@ public enum InstructionSet_X64 GFNI = 44, GFNI_V256 = 45, GFNI_V512 = 46, - X86Base_X64 = 47, - SSE_X64 = 48, - SSE2_X64 = 49, - SSE3_X64 = 50, - SSSE3_X64 = 51, - SSE41_X64 = 52, - SSE42_X64 = 53, - AVX_X64 = 54, - AVX2_X64 = 55, - AES_X64 = 56, - BMI1_X64 = 57, - BMI2_X64 = 58, - FMA_X64 = 59, - LZCNT_X64 = 60, - PCLMULQDQ_X64 = 61, - POPCNT_X64 = 62, - AVXVNNI_X64 = 63, - X86Serialize_X64 = 64, - AVX512F_X64 = 65, - AVX512BW_X64 = 66, - AVX512CD_X64 = 67, - AVX512DQ_X64 = 68, - AVX512VBMI_X64 = 69, - AVX10v1_X64 = 70, - AVX10v1_V512_X64 = 71, - AVX10v2_X64 = 72, - AVX10v2_V512_X64 = 73, - GFNI_X64 = 74, + AVXVNNIINT8 = 47, + AVXVNNIINT8_V512 = 48, + AVXVNNIINT16 = 49, + AVXVNNIINT16_V512 = 50, + X86Base_X64 = 51, + SSE_X64 = 52, + SSE2_X64 = 53, + SSE3_X64 = 54, + SSSE3_X64 = 55, + SSE41_X64 = 56, + SSE42_X64 = 57, + AVX_X64 = 58, + AVX2_X64 = 59, + AES_X64 = 60, + BMI1_X64 = 61, + BMI2_X64 = 62, + FMA_X64 = 63, + LZCNT_X64 = 64, + PCLMULQDQ_X64 = 65, + POPCNT_X64 = 66, + AVXVNNI_X64 = 67, + X86Serialize_X64 = 68, + AVX512F_X64 = 69, + AVX512BW_X64 = 70, + AVX512CD_X64 = 71, + AVX512DQ_X64 = 72, + AVX512VBMI_X64 = 73, + AVX10v1_X64 = 74, + AVX10v1_V512_X64 = 75, + AVX10v2_X64 = 76, + AVX10v2_V512_X64 = 77, + GFNI_X64 = 78, + AVXVNNIINT8_X64 = 79, + AVXVNNIINT16_X64 = 80, } public enum InstructionSet_X86 @@ -367,34 +385,40 @@ public enum InstructionSet_X86 GFNI = 44, GFNI_V256 = 45, GFNI_V512 = 46, - X86Base_X64 = 47, - SSE_X64 = 48, - SSE2_X64 = 49, - SSE3_X64 = 50, - SSSE3_X64 = 51, - SSE41_X64 = 52, - SSE42_X64 = 53, - AVX_X64 = 54, - AVX2_X64 = 55, - AES_X64 = 56, - BMI1_X64 = 57, - BMI2_X64 = 58, - FMA_X64 = 59, - LZCNT_X64 = 60, - PCLMULQDQ_X64 = 61, - POPCNT_X64 = 62, - AVXVNNI_X64 = 63, - X86Serialize_X64 = 64, - AVX512F_X64 = 65, - AVX512BW_X64 = 66, - AVX512CD_X64 = 67, - AVX512DQ_X64 = 68, - AVX512VBMI_X64 = 69, - AVX10v1_X64 = 70, - AVX10v1_V512_X64 = 71, - AVX10v2_X64 = 72, - AVX10v2_V512_X64 = 73, - GFNI_X64 = 74, + AVXVNNIINT8 = 47, + AVXVNNIINT8_V512 = 48, + AVXVNNIINT16 = 49, + AVXVNNIINT16_V512 = 50, + X86Base_X64 = 51, + SSE_X64 = 52, + SSE2_X64 = 53, + SSE3_X64 = 54, + SSSE3_X64 = 55, + SSE41_X64 = 56, + SSE42_X64 = 57, + AVX_X64 = 58, + AVX2_X64 = 59, + AES_X64 = 60, + BMI1_X64 = 61, + BMI2_X64 = 62, + FMA_X64 = 63, + LZCNT_X64 = 64, + PCLMULQDQ_X64 = 65, + POPCNT_X64 = 66, + AVXVNNI_X64 = 67, + X86Serialize_X64 = 68, + AVX512F_X64 = 69, + AVX512BW_X64 = 70, + AVX512CD_X64 = 71, + AVX512DQ_X64 = 72, + AVX512VBMI_X64 = 73, + AVX10v1_X64 = 74, + AVX10v1_V512_X64 = 75, + AVX10v2_X64 = 76, + AVX10v2_V512_X64 = 77, + GFNI_X64 = 78, + AVXVNNIINT8_X64 = 79, + AVXVNNIINT16_X64 = 80, } public unsafe struct InstructionSetFlags : IEnumerable @@ -741,6 +765,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -855,6 +887,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) @@ -984,6 +1024,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) @@ -1129,6 +1177,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1243,6 +1295,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -1372,6 +1432,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) @@ -1513,6 +1581,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true); + yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT8, true); + yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT8_V512, true); + yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT16, true); + yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT16_V512, true); break; case TargetArchitecture.X86: @@ -1562,6 +1634,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true); + yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT8, true); + yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT8_V512, true); + yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT16, true); + yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT16_V512, true); break; } } @@ -1654,6 +1730,10 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); if (HasInstructionSet(InstructionSet.X64_GFNI)) AddInstructionSet(InstructionSet.X64_GFNI_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); break; case TargetArchitecture.X86: @@ -1711,6 +1791,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX10v2_X64); AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); AddInstructionSet(InstructionSet.X64_GFNI_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); break; case TargetArchitecture.X86: @@ -1742,6 +1824,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX10v2_X64); AddInstructionSet(InstructionSet.X86_AVX10v2_V512_X64); AddInstructionSet(InstructionSet.X86_GFNI_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_X64); break; } } @@ -2092,6 +2176,24 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_GFNI; } + case "AvxVnniInt8": + if (nestedTypeName == "X64") + { return InstructionSet.X64_AVXVNNIINT8_X64; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT8_V512; } + else + { return InstructionSet.X64_AVXVNNIINT8; } + + case "AvxVnniInt16": + if (nestedTypeName == "X64") + { return InstructionSet.X64_AVXVNNIINT16_X64; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT16_V512; } + else + { return InstructionSet.X64_AVXVNNIINT16; } + } break; @@ -2228,6 +2330,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X86_GFNI; } + case "AvxVnniInt8": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT8_V512; } + else + { return InstructionSet.X86_AVXVNNIINT8; } + + case "AvxVnniInt16": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT16_V512; } + else + { return InstructionSet.X86_AVXVNNIINT16; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 53ee7c3f0e8c3e..5b6f962cc71be0 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -73,6 +73,10 @@ instructionset ,X86 ,Avx10v2_V512 , ,52 ,AVX10v2_V512 instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 +instructionset ,X86 ,AvxVnniInt8 , ,60 ,AVXVNNIINT8 ,avxvnniint8 +instructionset ,X86 ,AvxVnniInt8_V512 , ,61 ,AVXVNNIINT8_V512 ,avxvnniint8_v512 +instructionset ,X86 ,AvxVnniInt16 , ,62 ,AVXVNNIINT16 ,avxvnniint16 +instructionset ,X86 ,AvxVnniInt16_V512 , ,63 ,AVXVNNIINT16_V512 ,avxvnniint16_v512 instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -102,6 +106,8 @@ instructionset64bit,X86 ,AVX10v1_V512 instructionset64bit,X86 ,AVX10v2 instructionset64bit,X86 ,AVX10v2_V512 instructionset64bit,X86 ,GFNI +instructionset64bit,X86 ,AVXVNNIINT8 +instructionset64bit,X86 ,AVXVNNIINT16 vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 @@ -179,6 +185,10 @@ implication ,X86 ,AVX10v1_V512 ,AVX512VBMI implication ,X86 ,AVX10v1_V512 ,AVX512VBMI_VL implication ,X86 ,AVX10v2 ,AVX10v1 implication ,X86 ,AVX10v2_V512 ,AVX10v1_V512 +implication ,X86 ,AVX10v2 ,AVXVNNIINT8 +implication ,X86 ,AVX10v2 ,AVXVNNIINT16 +implication ,X86 ,AVX10v2_V512 ,AVXVNNIINT8_V512 +implication ,X86 ,AVX10v2_V512 ,AVXVNNIINT16_V512 ; These synthetic ISAs need to appear after the core ISAs ; as they depend on the other implications being correct first diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 35d3f0165ab84c..0ae1ca6c186f1b 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1428,6 +1428,24 @@ void EEJitManager::SetCpuInfo() } } + if ((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt8) != 0) + { + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT8)) + { + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT8); + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT8_V512); + } + } + + if ((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt16) != 0) + { + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT16)) + { + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT16); + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT16_V512); + } + } + if ((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) { if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d5874922a7bca..db42dfbf8b4344 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -338,6 +338,16 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } + if ((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) // AVX-VNNI-INT8 + { + result |= XArchIntrinsicConstants_AvxVnniInt8; + } + + if ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0) // AVX-VNNI-INT16 + { + result |= XArchIntrinsicConstants_AvxVnniInt16; + } + if (IsApxEnabled() && apxStateSupport()) { if ((cpuidInfo[CPUID_EDX] & (1 << 21)) != 0) // Apx @@ -360,6 +370,8 @@ int minipal_getcpufeatures(void) if (avx10Version >= 2) // Avx10.2 { result |= XArchIntrinsicConstants_Avx10v2; + result |= XArchIntrinsicConstants_AvxVnniInt8; // AvxVnniInt8 + result |= XArchIntrinsicConstants_AvxVnniInt16; // AvxVnniInt16 } // We assume that the Avx10/V512 support can be inferred from diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 9def54fa001063..86044a3f8f02dd 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -35,6 +35,8 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Vpclmulqdq = 0x200000, XArchIntrinsicConstants_Avx10v2 = 0x400000, XArchIntrinsicConstants_Gfni = 0x800000, + XArchIntrinsicConstants_AvxVnniInt8 = 0x01000000, + XArchIntrinsicConstants_AvxVnniInt16 = 0x02000000, }; #endif // HOST_X86 || HOST_AMD64 From 5372b2421601aea2a9234d18c76686e5fb8b8052 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 28 Mar 2025 20:33:13 -0700 Subject: [PATCH 02/32] AVXVNNIINT* API surface and template tests --- src/coreclr/jit/emitxarch.cpp | 22 ++- src/coreclr/jit/emitxarch.h | 2 + src/coreclr/jit/hwintrinsic.cpp | 52 +++++- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 12 ++ src/coreclr/jit/hwintrinsiclistxarch.h | 52 ++++++ src/coreclr/jit/hwintrinsicxarch.cpp | 25 +++ src/coreclr/jit/instr.h | 2 +- src/coreclr/jit/instrsxarch.h | 32 ++-- src/coreclr/jit/lowerxarch.cpp | 12 ++ src/coreclr/jit/lsraxarch.cpp | 12 ++ .../System.Private.CoreLib.Shared.projitems | 4 + .../X86/AvxVnniInt16.PlatformNotSupported.cs | 99 ++++++++++ .../Runtime/Intrinsics/X86/AvxVnniInt16.cs | 98 ++++++++++ .../X86/AvxVnniInt8.PlatformNotSupported.cs | 97 ++++++++++ .../Runtime/Intrinsics/X86/AvxVnniInt8.cs | 99 ++++++++++ .../ref/System.Runtime.Intrinsics.cs | 70 +++++++ .../GenerateHWIntrinsicTests_X86.cs | 171 ++++++++++++------ ...rdwareIntrinsics_X86_AvxVnniInt16_r.csproj | 20 ++ ...dwareIntrinsics_X86_AvxVnniInt16_ro.csproj | 20 ++ ...ardwareIntrinsics_X86_AvxVnniInt8_r.csproj | 20 ++ ...rdwareIntrinsics_X86_AvxVnniInt8_ro.csproj | 20 ++ .../Shared/_TernaryOpTestTemplate.template | 10 +- .../AvxVnniInt16/AvxVnniInt16SampleTest.cs | 50 +++++ .../AvxVnniInt16_handwritten_r.csproj | 14 ++ .../AvxVnniInt16_handwritten_ro.csproj | 14 ++ .../AvxVnniInt16/AvxVnniInt16_r.csproj | 14 ++ .../AvxVnniInt16/AvxVnniInt16_ro.csproj | 14 ++ .../AvxVnniInt16/Program.AvxVnniInt16.cs | 16 ++ .../AvxVnniInt16_V512SampleTest.cs | 50 +++++ .../AvxVnniInt16_V512_handwritten_r.csproj | 14 ++ .../AvxVnniInt16_V512_handwritten_ro.csproj | 14 ++ .../AvxVnniInt16_V512_r.csproj | 14 ++ .../AvxVnniInt16_V512_ro.csproj | 14 ++ .../Program.AvxVnniInt16_V512.cs | 16 ++ .../X86_AvxVnniInt16/Directory.Build.props | 10 + .../X86_AvxVnniInt16/Directory.Build.targets | 32 ++++ .../AvxVnniInt8/AvxVnniInt8SampleTest.cs | 50 +++++ .../AvxVnniInt8_handwritten_r.csproj | 14 ++ .../AvxVnniInt8_handwritten_ro.csproj | 14 ++ .../AvxVnniInt8/AvxVnniInt8_r.csproj | 14 ++ .../AvxVnniInt8/AvxVnniInt8_ro.csproj | 14 ++ .../AvxVnniInt8/Program.AvxVnniInt8.cs | 16 ++ .../AvxVnniInt8_V512SampleTest.cs | 50 +++++ .../AvxVnniInt8_V512_handwritten_r.csproj | 14 ++ .../AvxVnniInt8_V512_handwritten_ro.csproj | 14 ++ .../AvxVnniInt8_V512_r.csproj | 14 ++ .../AvxVnniInt8_V512_ro.csproj | 14 ++ .../Program.AvxVnniInt8_V512.cs | 16 ++ .../X86_AvxVnniInt8/Directory.Build.props | 10 + .../X86_AvxVnniInt8/Directory.Build.targets | 32 ++++ 50 files changed, 1442 insertions(+), 81 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props create mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ea229d069aa05f..1dcde20df47102 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -100,6 +100,16 @@ bool emitter::IsAVXVNNIInstruction(instruction ins) return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION); } +bool emitter::IsAVXVNNIINT8Instruction(instruction ins) +{ + return (ins >= INS_FIRST_AVXVNNIINT8_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT8_INSTRUCTION); +} + +bool emitter::IsAVXVNNIINT16Instruction(instruction ins) +{ + return (ins >= INS_FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT16_INSTRUCTION); +} + bool emitter::IsBMIInstruction(instruction ins) { return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION); @@ -2888,7 +2898,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co if (sizePrefix == 0) { // no simd prefix for EVEX2 - AVX10.2 and above - assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); } else if (isPrefix(sizePrefix)) { @@ -4289,7 +4299,7 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) const #if defined(DEBUG) insCode = (insCode >> 16) & 0xFF; - assert((insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3)); + assert((insCode == 0x00) || (insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3)); #endif // DEBUG return true; @@ -9980,7 +9990,7 @@ void emitter::emitIns_SIMD_R_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10013,7 +10023,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10044,7 +10054,7 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, regNumber op3Reg, insOpts instOptions) { - if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)) + if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)) { assert(UseSimdEncoding()); @@ -10131,7 +10141,7 @@ void emitter::emitIns_SIMD_R_R_R_S(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 3d4f220a15a635..edc4ac79ae2965 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -124,6 +124,8 @@ static bool IsFMAInstruction(instruction ins); static bool IsPermuteVar2xInstruction(instruction ins); static bool IsKMOVInstruction(instruction ins); static bool IsAVXVNNIInstruction(instruction ins); +static bool IsAVXVNNIINT8Instruction(instruction ins); +static bool IsAVXVNNIINT16Instruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); static bool IsKInstructionWithLBit(instruction ins); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index c09816ca7ad794..4c62f2c9955b5a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -812,10 +812,10 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_GFNI, LAST_NI_GFNI }, { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, - { NI_Illegal, NI_Illegal }, // AVXVNNIINT8 - { NI_Illegal, NI_Illegal }, // AVXVNNIINT8_V512 - { NI_Illegal, NI_Illegal }, // AVXVNNIINT16 - { NI_Illegal, NI_Illegal }, // AVXVNNIINT16_V512 + { FIRST_NI_AVXVNNIINT8, LAST_NI_AVXVNNIINT8 }, // AVXVNNIINT8 + { FIRST_NI_AVXVNNIINT8_V512, LAST_NI_AVXVNNIINT8_V512 }, // AVXVNNIINT8_V512 + { FIRST_NI_AVXVNNIINT16, LAST_NI_AVXVNNIINT16 }, // AVXVNNIINT16 + { FIRST_NI_AVXVNNIINT16_V512, LAST_NI_AVXVNNIINT16_V512 }, // AVXVNNIINT16_V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, @@ -2233,6 +2233,50 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { #if defined(TARGET_XARCH) + case NI_AVXVNNIINT8_MultiplyWideningAndAdd: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddByteByte : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByte : NI_EVEX_MultiplyWideningAndAddSByteSByte); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + + case NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddByteByteSaturate : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByteSaturate : NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + + case NI_AVXVNNIINT16_MultiplyWideningAndAdd: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) ? NI_EVEX_MultiplyWideningAndAddInt16UInt16 : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16 : NI_EVEX_MultiplyWideningAndAddUInt16Int16); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + + case NI_AVXVNNIINT16_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddSaturate: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) ? NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate : NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AVX2_GatherVector128: case NI_AVX2_GatherVector256: assert(varTypeIsSIMD(op2->TypeGet())); diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a4f22c16ec2c6f..36fbd99313e17a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -844,6 +844,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteSByte: + case NI_EVEX_MultiplyWideningAndAddSByteByte: + case NI_EVEX_MultiplyWideningAndAddByteByte: + case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: { assert(targetReg != REG_NA); assert(op1Reg != REG_NA); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index f8c72b30277d49..8cb84950e22f93 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1477,6 +1477,46 @@ HARDWARE_INTRINSIC(AVX10v2_V512, MinMax, HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) #define LAST_NI_AVX10v2_V512 NI_AVX10v2_V512_MultipleSumAbsoluteDifferences +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT8 Intrinsics +#define FIRST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT8_V512 Intrinsics +#define FIRST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAdd, 64, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT16 Intrinsics +#define FIRST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAddSaturate + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT8_V512 Intrinsics +#define FIRST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAddSaturate + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} @@ -1712,6 +1752,18 @@ HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 106a0b952fe139..c49d6dcb0c79e7 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -56,6 +56,10 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX10v2_V512_X64; case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; + case InstructionSet_AVXVNNIINT8: + return InstructionSet_AVXVNNIINT8_X64; + case InstructionSet_AVXVNNIINT16: + return InstructionSet_AVXVNNIINT16_X64; case InstructionSet_AES: return InstructionSet_AES_X64; case InstructionSet_BMI1: @@ -151,6 +155,10 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_GFNI_V512; case InstructionSet_PCLMULQDQ: return InstructionSet_PCLMULQDQ_V512; + case InstructionSet_AVXVNNIINT8: + return InstructionSet_AVXVNNIINT8_V512; + case InstructionSet_AVXVNNIINT16: + return InstructionSet_AVXVNNIINT16_V512; default: return InstructionSet_NONE; } @@ -191,6 +199,17 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_AVX10v2; } } + else if (strncmp(className + 3, "VnniInt", 7) == 0) + { + if (strcmp(className + 10, "8") == 0) + { + return InstructionSet_AVXVNNIINT8; + } + else if (strcmp(className + 10, "16") == 0) + { + return InstructionSet_AVXVNNIINT16; + } + } else if (strcmp(className + 3, "2") == 0) { return InstructionSet_AVX2; @@ -932,6 +951,12 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX10v2_X64: case InstructionSet_AVX10v2_V512: case InstructionSet_AVX10v2_V512_X64: + case InstructionSet_AVXVNNIINT8: + case InstructionSet_AVXVNNIINT8_X64: + case InstructionSet_AVXVNNIINT8_V512: + case InstructionSet_AVXVNNIINT16: + case InstructionSet_AVXVNNIINT16_X64: + case InstructionSet_AVXVNNIINT16_V512: case InstructionSet_EVEX: case InstructionSet_GFNI: case InstructionSet_GFNI_X64: diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index b8f1adfe762d33..afc6fa9d5309c2 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -98,7 +98,7 @@ enum instruction : uint32_t inline bool IsSimdInstruction(instruction ins) { #if defined(TARGET_XARCH) - return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX10v2_INSTRUCTION); + return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT8_INSTRUCTION); #else return false; #endif // TARGET_XARCH diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 8ce09fa6468934..3612b8ae371f7c 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -923,19 +923,27 @@ INST3(vminmaxpd, "minmaxpd", IUM_WR, BAD_CODE, BAD_ INST3(vminmaxps, "minmaxps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x52), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum packed singles INST3(vmovd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs INST3(vmovw, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(FIRST_AVXVNNIINT16_INSTRUCTION, "FIRST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(LAST_AVXVNNIINT16_INSTRUCTION, "LAST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(FIRST_AVXVNNIINT8_INSTRUCTION, "FIRST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(LAST_AVXVNNIINT8_INSTRUCTION, "LAST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + + // id nm um mr mi rm tt flags INST3(FIRST_APX_INSTRUCTION, "FIRST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 868718636d3454..e71137060663d9 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10635,6 +10635,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteSByte: + case NI_EVEX_MultiplyWideningAndAddSByteByte: + case NI_EVEX_MultiplyWideningAndAddByteByte: + case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: { TryMakeSrcContainedOrRegOptional(node, op3); break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 2a7e39be76e9ee..2f9877c2d2272b 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2750,6 +2750,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteSByte: + case NI_EVEX_MultiplyWideningAndAddSByteByte: + case NI_EVEX_MultiplyWideningAndAddByteByte: + case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: + case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16: + case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: { assert(numArgs == 3); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index d3f62e4c34b7d9..e4aef24bda58c0 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2650,6 +2650,8 @@ + + @@ -2683,6 +2685,8 @@ + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs new file mode 100644 index 00000000000000..0286c3947c27e9 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class AvxVnniInt16 : Avx2 + { + internal AvxVnniInt16() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + // VPDPWSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics. + [Intrinsic] + public new abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + } + } +} \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs new file mode 100644 index 00000000000000..8e9aa059a84d2f --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs @@ -0,0 +1,98 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class AvxVnniInt16 : Avx2 + { + internal AvxVnniInt16() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + // VPDPWSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics. + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs new file mode 100644 index 00000000000000..c4c10259771544 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class AvxVnniInt8 : Avx2 + { + internal AvxVnniInt8() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + // VPDPBSSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics. + public new abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + } + } +} \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs new file mode 100644 index 00000000000000..20668158f87dad --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class AvxVnniInt8 : Avx2 + { + internal AvxVnniInt8() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + // VPDPBSSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics. + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 02984d153ab9fd..4e90a1151e31f1 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -7277,6 +7277,76 @@ internal X64() { } } } + [System.CLSCompliantAttribute(false)] + public abstract partial class AvxVnniInt8 : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnniInt8() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } + + [System.CLSCompliantAttribute(false)] + public abstract partial class AvxVnniInt16 : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnniInt16() { } + public static new bool IsSupported { get { throw null; } } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } + [System.CLSCompliantAttribute(false)] public abstract partial class Avx512BW : System.Runtime.Intrinsics.X86.Avx512F { diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 1138eb6b4cd05a..121ddd80decbf7 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -1866,6 +1866,62 @@ ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx10v2.V512", ["LoadIsa"] = "Avx10v1.V512", ["Method"] = "MinMax", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Imm"] = "15", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "result[0] != -1.0 * (((Math.Abs(left[0]) > Math.Abs(right[0])) ? left[0] : right[0]))", ["ValidateRemainingResults"] = "result[i] != -1.0 * (((Math.Abs(left[i]) > Math.Abs(right[i])) ? left[i] : right[i]))"}), }; +(string templateFileName, Dictionary templateData)[] AvxVnniInt8Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt8_V512Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt16Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt16_V512Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), +}; + (string templateFileName, Dictionary templateData)[] Avx512F_ScalarUpperInputs = new [] { ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "GetExponentScalar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "result[0] != Avx512Verify.GetExponent(right[0])", ["ValidateRemainingResults"] = "result[i] != left[i]"}), @@ -3749,59 +3805,63 @@ bool isImmTemplate(string name) string outputDirectory = args[2]; string testListFileName = args[3]; -ProcessInputs("Sse1", Sse1Inputs); -ProcessInputs("Sse1.X64", Sse1X64Inputs); -ProcessInputs("Sse2", Sse2Inputs); -ProcessInputs("Sse2.X64", Sse2X64Inputs); -ProcessInputs("Sse3", Sse3Inputs); -ProcessInputs("Ssse3", Ssse3Inputs); -ProcessInputs("Sse41", Sse41Inputs); -ProcessInputs("Sse41_Overloaded", Sse41_OverloadedInputs); -ProcessInputs("Sse41.X64", Sse41X64Inputs); -ProcessInputs("Sse42", Sse42Inputs); -ProcessInputs("Avx1", Avx1Inputs); -ProcessInputs("Avx1_Vector128", Avx1_Vector128Inputs); -ProcessInputs("Avx2", Avx2Inputs); -ProcessInputs("Avx2_Vector128", Avx2_Vector128Inputs); -ProcessInputs("Avx512F", Avx512FInputs); -ProcessInputs("Avx512F_ScalarUpper", Avx512F_ScalarUpperInputs); -ProcessInputs("Avx512F_VL_Vector128", Avx512F_VL_Vector128Inputs); -ProcessInputs("Avx512F_VL_Vector256", Avx512F_VL_Vector256Inputs); -ProcessInputs("Avx512F_X64", Avx512F_X64Inputs); -ProcessInputs("Avx512BW", Avx512BWInputs); -ProcessInputs("Avx512BW_VL_Vector128", Avx512BW_VL_Vector128Inputs); -ProcessInputs("Avx512BW_VL_Vector256", Avx512BW_VL_Vector256Inputs); -ProcessInputs("Avx512CD", Avx512CDInputs); -ProcessInputs("Avx512CD_VL_Vector128", Avx512CD_VL_Vector128Inputs); -ProcessInputs("Avx512CD_VL_Vector256", Avx512CD_VL_Vector256Inputs); -ProcessInputs("Avx512DQ", Avx512DQInputs); -ProcessInputs("Avx512DQ_ScalarUpper", Avx512DQ_ScalarUpperInputs); -ProcessInputs("Avx512DQ_VL_Vector128", Avx512DQ_VL_Vector128Inputs); -ProcessInputs("Avx512DQ_VL_Vector256", Avx512DQ_VL_Vector256Inputs); -ProcessInputs("Avx512Vbmi", Avx512VbmiInputs); -ProcessInputs("Avx512Vbmi_VL_Vector128", Avx512Vbmi_VL_Vector128Inputs); -ProcessInputs("Avx512Vbmi_VL_Vector256", Avx512Vbmi_VL_Vector256Inputs); -ProcessInputs("Avx10v1_ScalarUpper", Avx10v1_ScalarUpperInputs); -ProcessInputs("Avx10v1_Vector128", Avx10v1_Vector128Inputs); -ProcessInputs("Avx10v1_Vector256", Avx10v1_Vector256Inputs); -ProcessInputs("Avx10v1_V512", Avx10v1_V512Inputs); -ProcessInputs("Fma_Vector128", Fma_Vector128Inputs); -ProcessInputs("Fma_Vector256", Fma_Vector256Inputs); -ProcessInputs("Bmi1", Bmi1Inputs); -ProcessInputs("Bmi1.X64", Bmi1X64Inputs); -ProcessInputs("Aes", AesInputs); -ProcessInputs("Pclmulqdq", PclmulqdqInputs); -ProcessInputs("Pclmulqdq.V256", PclmulqdqV256Inputs); -ProcessInputs("Pclmulqdq.V512", PclmulqdqV512Inputs); -ProcessInputs("Bmi2", Bmi2Inputs); -ProcessInputs("Bmi2.X64", Bmi2X64Inputs); -ProcessInputs("X86Base", X86BaseInputs); -ProcessInputs("X86Base.X64", X86BaseX64Inputs); -ProcessInputs("Gfni", GfniInputs); -ProcessInputs("Gfni.V256", GfniV256Inputs); -ProcessInputs("Gfni.V512", GfniV512Inputs); -ProcessInputs("Avx10v2", Avx10v2Inputs); -ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs); +// ProcessInputs("Sse1", Sse1Inputs); +// ProcessInputs("Sse1.X64", Sse1X64Inputs); +// ProcessInputs("Sse2", Sse2Inputs); +// ProcessInputs("Sse2.X64", Sse2X64Inputs); +// ProcessInputs("Sse3", Sse3Inputs); +// ProcessInputs("Ssse3", Ssse3Inputs); +// ProcessInputs("Sse41", Sse41Inputs); +// ProcessInputs("Sse41_Overloaded", Sse41_OverloadedInputs); +// ProcessInputs("Sse41.X64", Sse41X64Inputs); +// ProcessInputs("Sse42", Sse42Inputs); +// ProcessInputs("Avx1", Avx1Inputs); +// ProcessInputs("Avx1_Vector128", Avx1_Vector128Inputs); +// ProcessInputs("Avx2", Avx2Inputs); +// ProcessInputs("Avx2_Vector128", Avx2_Vector128Inputs); +// ProcessInputs("Avx512F", Avx512FInputs); +// ProcessInputs("Avx512F_ScalarUpper", Avx512F_ScalarUpperInputs); +// ProcessInputs("Avx512F_VL_Vector128", Avx512F_VL_Vector128Inputs); +// ProcessInputs("Avx512F_VL_Vector256", Avx512F_VL_Vector256Inputs); +// ProcessInputs("Avx512F_X64", Avx512F_X64Inputs); +// ProcessInputs("Avx512BW", Avx512BWInputs); +// ProcessInputs("Avx512BW_VL_Vector128", Avx512BW_VL_Vector128Inputs); +// ProcessInputs("Avx512BW_VL_Vector256", Avx512BW_VL_Vector256Inputs); +// ProcessInputs("Avx512CD", Avx512CDInputs); +// ProcessInputs("Avx512CD_VL_Vector128", Avx512CD_VL_Vector128Inputs); +// ProcessInputs("Avx512CD_VL_Vector256", Avx512CD_VL_Vector256Inputs); +// ProcessInputs("Avx512DQ", Avx512DQInputs); +// ProcessInputs("Avx512DQ_ScalarUpper", Avx512DQ_ScalarUpperInputs); +// ProcessInputs("Avx512DQ_VL_Vector128", Avx512DQ_VL_Vector128Inputs); +// ProcessInputs("Avx512DQ_VL_Vector256", Avx512DQ_VL_Vector256Inputs); +// ProcessInputs("Avx512Vbmi", Avx512VbmiInputs); +// ProcessInputs("Avx512Vbmi_VL_Vector128", Avx512Vbmi_VL_Vector128Inputs); +// ProcessInputs("Avx512Vbmi_VL_Vector256", Avx512Vbmi_VL_Vector256Inputs); +// ProcessInputs("Avx10v1_ScalarUpper", Avx10v1_ScalarUpperInputs); +// ProcessInputs("Avx10v1_Vector128", Avx10v1_Vector128Inputs); +// ProcessInputs("Avx10v1_Vector256", Avx10v1_Vector256Inputs); +// ProcessInputs("Avx10v1_V512", Avx10v1_V512Inputs); +// ProcessInputs("Fma_Vector128", Fma_Vector128Inputs); +// ProcessInputs("Fma_Vector256", Fma_Vector256Inputs); +// ProcessInputs("Bmi1", Bmi1Inputs); +// ProcessInputs("Bmi1.X64", Bmi1X64Inputs); +// ProcessInputs("Aes", AesInputs); +// ProcessInputs("Pclmulqdq", PclmulqdqInputs); +// ProcessInputs("Pclmulqdq.V256", PclmulqdqV256Inputs); +// ProcessInputs("Pclmulqdq.V512", PclmulqdqV512Inputs); +// ProcessInputs("Bmi2", Bmi2Inputs); +// ProcessInputs("Bmi2.X64", Bmi2X64Inputs); +// ProcessInputs("X86Base", X86BaseInputs); +// ProcessInputs("X86Base.X64", X86BaseX64Inputs); +// ProcessInputs("Gfni", GfniInputs); +// ProcessInputs("Gfni.V256", GfniV256Inputs); +// ProcessInputs("Gfni.V512", GfniV512Inputs); +// ProcessInputs("Avx10v2", Avx10v2Inputs); +// ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs); +ProcessInputs("AvxVnniInt8", AvxVnniInt8Inputs); +ProcessInputs("AvxVnniInt8_V512", AvxVnniInt8_V512Inputs); +ProcessInputs("AvxVnniInt16", AvxVnniInt16Inputs); +ProcessInputs("AvxVnniInt16_V512", AvxVnniInt16_V512Inputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) @@ -3835,6 +3895,11 @@ void ProcessInput(StreamWriter testListFile, string groupName, (string templateF var suffix = ""; + if (input.templateFileName == "SimpleTernOpTest.template") + { + testName += $"{input.templateData["Op1VectorType"]}.{input.templateData["Op1BaseType"]}{input.templateData["Op2VectorType"]}.{input.templateData["Op2BaseType"]}.{input.templateData["Op3VectorType"]}.{input.templateData["Op3BaseType"]}"; + } + if (input.templateFileName == "SimpleUnOpConvTest.template" || input.templateFileName == "SimdScalarUnOpConvTest.template" ) { testName = $"{input.templateData["Method"]}.{input.templateData["Op1VectorType"]}{input.templateData["Op1BaseType"]}"; diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj new file mode 100644 index 00000000000000..f22223d3c45b2e --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj @@ -0,0 +1,20 @@ + + + 2 + + true + true + + + + true + true + true + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj new file mode 100644 index 00000000000000..b1dee63473dc31 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj @@ -0,0 +1,20 @@ + + + 2 + + true + true + + + + true + true + true + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj new file mode 100644 index 00000000000000..5f1151b24b7607 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj @@ -0,0 +1,20 @@ + + + 2 + + true + true + + + + true + true + true + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj new file mode 100644 index 00000000000000..612599305d9b32 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj @@ -0,0 +1,20 @@ + + + 2 + + true + true + + + + true + true + true + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template index cd9caf6b7072b5..1b22d0b096fd40 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template @@ -20,9 +20,9 @@ namespace JIT.HardwareIntrinsics.X86 public static partial class Program { [Fact] - public static void {Method}{RetBaseType}() + public static void {Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}() { - var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}(); + var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}(); if (test.IsSupported) { @@ -78,7 +78,7 @@ namespace JIT.HardwareIntrinsics.X86 } } - public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType} + public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType} { private struct TestStruct { @@ -100,7 +100,7 @@ namespace JIT.HardwareIntrinsics.X86 return testStruct; } - public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType} testClass) + public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType} testClass) { var result = {Isa}.{Method}(_fld1, _fld2, _fld3); @@ -126,7 +126,7 @@ namespace JIT.HardwareIntrinsics.X86 private SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}> _dataTable; - public {TemplateName}TernaryOpTest__{Method}{RetBaseType}() + public {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}() { Succeeded = true; diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs new file mode 100644 index 00000000000000..f79ba7132aee3a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt16 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt16SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt16.IsSupported) + { + Console.WriteLine("AvxVnniInt16 supported"); + } + else { + Console.WriteLine("AvxVnniInt16 not supported"); + } + if (AvxVnniInt16.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt16_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt16_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj new file mode 100644 index 00000000000000..b0e0c15535b643 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj new file mode 100644 index 00000000000000..37af53d8b83004 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj new file mode 100644 index 00000000000000..a2dd5040f106b9 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj new file mode 100644 index 00000000000000..47301f1344bf55 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs new file mode 100644 index 00000000000000..7c9a1f9d2418d6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs new file mode 100644 index 00000000000000..a3faf347bcb7b8 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt16_V512 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt16_V512SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt16.IsSupported) + { + Console.WriteLine("AvxVnniInt16 supported"); + } + else { + Console.WriteLine("AvxVnniInt16 not supported"); + } + if (AvxVnniInt16.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt16_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt16_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj new file mode 100644 index 00000000000000..913145750c6879 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj new file mode 100644 index 00000000000000..d301293f0763ea --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj new file mode 100644 index 00000000000000..05c40b2a2ec0a2 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj new file mode 100644 index 00000000000000..4b159ee2cdd3f0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs new file mode 100644 index 00000000000000..52895c8b02fdf5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16_V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props new file mode 100644 index 00000000000000..e3e1bac79c32c5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props @@ -0,0 +1,10 @@ + + + + + + + true + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets new file mode 100644 index 00000000000000..5b046968c0461a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets @@ -0,0 +1,32 @@ + + + + + + + $(IntermediateOutputPath)$(MSBuildProjectName)/gen/ + $(GeneratedHWIntrinsicTestDirectory)GeneratedHWIntrinsicTestList.txt + + + + + + + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs new file mode 100644 index 00000000000000..18a547f3046dca --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt8 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt8SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt8.IsSupported) + { + Console.WriteLine("AvxVnniInt8 supported"); + } + else { + Console.WriteLine("AvxVnniInt8 not supported"); + } + if (AvxVnniInt8.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt8_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt8_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj new file mode 100644 index 00000000000000..f5a1b8ec79165a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj new file mode 100644 index 00000000000000..19edfb7a1e4a3e --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj new file mode 100644 index 00000000000000..4860476c221996 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj new file mode 100644 index 00000000000000..85c963490ba6e8 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs new file mode 100644 index 00000000000000..e20f252e9e9cb4 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs new file mode 100644 index 00000000000000..ce0f11ec035755 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt8_V512 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt8_V512SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt8.IsSupported) + { + Console.WriteLine("AvxVnniInt8 supported"); + } + else { + Console.WriteLine("AvxVnniInt8 not supported"); + } + if (AvxVnniInt8.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt8_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt8_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj new file mode 100644 index 00000000000000..9dad95ac6905a3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj new file mode 100644 index 00000000000000..1a9d7fa07349a6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj new file mode 100644 index 00000000000000..bf04181f2eed27 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj new file mode 100644 index 00000000000000..44720c47ad4db3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs new file mode 100644 index 00000000000000..f1910642eed9ec --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8_V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props new file mode 100644 index 00000000000000..e3e1bac79c32c5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props @@ -0,0 +1,10 @@ + + + + + + + true + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets new file mode 100644 index 00000000000000..5b046968c0461a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets @@ -0,0 +1,32 @@ + + + + + + + $(IntermediateOutputPath)$(MSBuildProjectName)/gen/ + $(GeneratedHWIntrinsicTestDirectory)GeneratedHWIntrinsicTestList.txt + + + + + + + + + + + + + + + + + From 3fcd67417029509a4ba01669b60f4176b466cfeb Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 14 Apr 2025 14:35:22 -0700 Subject: [PATCH 03/32] Run formatting --- src/coreclr/jit/emitxarch.cpp | 16 ++++++---- src/coreclr/jit/hwintrinsic.cpp | 44 ++++++++++++++++++++-------- src/coreclr/jit/hwintrinsicxarch.cpp | 2 +- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1dcde20df47102..685a9be01acbdf 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2898,7 +2898,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co if (sizePrefix == 0) { // no simd prefix for EVEX2 - AVX10.2 and above - assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); } else if (isPrefix(sizePrefix)) { @@ -9990,7 +9992,8 @@ void emitter::emitIns_SIMD_R_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || + IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10023,7 +10026,8 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || + IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10054,7 +10058,8 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, regNumber op3Reg, insOpts instOptions) { - if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)) + if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || + IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)) { assert(UseSimdEncoding()); @@ -10141,7 +10146,8 @@ void emitter::emitIns_SIMD_R_R_R_S(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || + IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 4c62f2c9955b5a..9cf0b801c22854 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2238,9 +2238,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddByteByte : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByte : NI_EVEX_MultiplyWideningAndAddSByteSByte); - retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || + (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) + ? NI_EVEX_MultiplyWideningAndAddByteByte + : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByte + : NI_EVEX_MultiplyWideningAndAddSByteSByte); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } @@ -2249,9 +2254,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddByteByteSaturate : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByteSaturate : NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate); - retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || + (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) + ? NI_EVEX_MultiplyWideningAndAddByteByteSaturate + : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByteSaturate + : NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } @@ -2260,9 +2270,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) ? NI_EVEX_MultiplyWideningAndAddInt16UInt16 : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16 : NI_EVEX_MultiplyWideningAndAddUInt16Int16); - retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || + (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) + ? NI_EVEX_MultiplyWideningAndAddInt16UInt16 + : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16 + : NI_EVEX_MultiplyWideningAndAddUInt16Int16); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } @@ -2271,9 +2286,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) ? NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate : NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate); - retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || + (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) + ? NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate + : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate + : NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index c49d6dcb0c79e7..8236962115a136 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -59,7 +59,7 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) case InstructionSet_AVXVNNIINT8: return InstructionSet_AVXVNNIINT8_X64; case InstructionSet_AVXVNNIINT16: - return InstructionSet_AVXVNNIINT16_X64; + return InstructionSet_AVXVNNIINT16_X64; case InstructionSet_AES: return InstructionSet_AES_X64; case InstructionSet_BMI1: From f07b32630ba2e19780439f7eb4866d0ebfd26b90 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 15 Apr 2025 14:36:34 -0700 Subject: [PATCH 04/32] Remove new keyword where not required --- .../Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs | 6 +++--- .../Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs index 0286c3947c27e9..c91bbe0f481dc6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs @@ -68,14 +68,14 @@ internal X64() { } /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics. [Intrinsic] - public new abstract class V512 + public abstract class V512 { internal V512() { } /// Gets a value that indicates whether the APIs in this class are supported. /// if the APIs are supported; otherwise, . /// A value of indicates that the APIs will throw . - public static new bool IsSupported { [Intrinsic] get { return false; } } + public static bool IsSupported { [Intrinsic] get { return false; } } // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } @@ -96,4 +96,4 @@ internal V512() { } public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } } } -} \ No newline at end of file +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs index c4c10259771544..19f79db01415c2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs @@ -66,14 +66,14 @@ internal X64() { } public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics. - public new abstract class V512 + public abstract class V512 { internal V512() { } /// Gets a value that indicates whether the APIs in this class are supported. /// if the APIs are supported; otherwise, . /// A value of indicates that the APIs will throw . - public static new bool IsSupported { [Intrinsic] get { return false; } } + public static bool IsSupported { [Intrinsic] get { return false; } } // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } @@ -94,4 +94,4 @@ internal V512() { } public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } } } -} \ No newline at end of file +} From 14e224b6c1ade713ca4b4cf8ab5085210220a0c5 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 17 Apr 2025 10:52:35 -0700 Subject: [PATCH 05/32] Move AvxVnniInt* with other Vex instruction sets --- src/coreclr/tools/Common/InstructionSetHelpers.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 83e48ba1856c26..415f0ce78be2a1 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -206,6 +206,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2)) { optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16"); } optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma"); @@ -233,9 +235,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2_v512"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8_v512"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } From 64e1f7dab89b82132db96a7fc884611cb4501a88 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 17 Apr 2025 12:04:18 -0700 Subject: [PATCH 06/32] Add smoke test for Avx10.2 and add AvxvnniInt* Isas to those tests --- .../SmokeTests/HardwareIntrinsics/Program.cs | 72 +++++++++++++++++++ .../HardwareIntrinsics/X64Avx10v2.csproj | 33 +++++++++ 2 files changed, 105 insertions(+) create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 92e8f656f681af..1b8081eb4d44e1 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -63,8 +63,12 @@ static int Main() bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt8 = false; + bool? ExpectedAvxVnniInt16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; + bool? ExpectedAvx10v2V512 = false; bool? ExpectedAvx512F = false; bool? ExpectedAvx512BW = false; bool? ExpectedAvx512CD = false; @@ -91,8 +95,12 @@ static int Main() bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt8 = false; + bool? ExpectedAvxVnniInt16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; + bool? ExpectedAvx10v2V512 = false; bool? ExpectedAvx512F = false; bool? ExpectedAvx512BW = false; bool? ExpectedAvx512CD = false; @@ -119,8 +127,12 @@ static int Main() bool? ExpectedBmi1 = null; bool? ExpectedBmi2 = null; bool? ExpectedAvxVnni = false; // TODO: Fix once opportunistic Avx2 is allowed + bool? ExpectedAvxVnniInt8 = false; // TODO: Fix once opportunistic Avx2 is allowed + bool? ExpectedAvxVnniInt16 = false; // TODO: Fix once opportunistic Avx2 is allowed bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; + bool? ExpectedAvx10v2V512 = false; bool? ExpectedAvx512F = false; bool? ExpectedAvx512BW = false; bool? ExpectedAvx512CD = false; @@ -147,8 +159,12 @@ static int Main() bool? ExpectedBmi1 = null; bool? ExpectedBmi2 = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt8 = null; + bool? ExpectedAvxVnniInt16 = null; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; + bool? ExpectedAvx10v2V512 = false; bool? ExpectedAvx512F = false; bool? ExpectedAvx512BW = false; bool? ExpectedAvx512CD = false; @@ -175,8 +191,12 @@ static int Main() bool? ExpectedBmi1 = null; bool? ExpectedBmi2 = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt8 = null; + bool? ExpectedAvxVnniInt16 = null; bool? ExpectedAvx10v1 = null; bool? ExpectedAvx10v1V512 = null; + bool? ExpectedAvx10v2 = null; + bool? ExpectedAvx10v2V512 = null; bool? ExpectedAvx512F = true; bool? ExpectedAvx512BW = true; bool? ExpectedAvx512CD = true; @@ -186,6 +206,38 @@ static int Main() bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedGfniV512 = null; +#elif AVX10v2_INTRINSICS + bool? ExpectedSse3 = true; + bool? ExpectedSsse3 = true; + bool? ExpectedAes = null; + bool? ExpectedLzcnt = null; + bool? ExpectedPclmulqdq = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedSse41 = true; + bool? ExpectedSse42 = true; + bool? ExpectedPopcnt = null; + bool? ExpectedAvx = true; + bool? ExpectedAvx2 = true; + bool? ExpectedFma = true; + bool? ExpectedBmi1 = null; + bool? ExpectedBmi2 = null; + bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt8 = true; + bool? ExpectedAvxVnniInt16 = true; + bool? ExpectedAvx10v1 = true; + bool? ExpectedAvx10v1V512 = true; + bool? ExpectedAvx10v2 = true; + bool? ExpectedAvx10v2V512 = true; + bool? ExpectedAvx512F = true; + bool? ExpectedAvx512BW = true; + bool? ExpectedAvx512CD = true; + bool? ExpectedAvx512DQ = true; + bool? ExpectedAvx512Vbmi = true; + bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = null; #else #error Who dis? #endif @@ -260,12 +312,24 @@ static int Main() Check("AvxVnni", ExpectedAvxVnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); Check("AvxVnni.X64", ExpectedAvxVnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null); + Check("AvxVnniInt8", ExpectedAvxVnniInt8, &AvxVnniInt8IsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("AvxVnniInt8.X64", ExpectedAvxVnniInt8, &AvxVnniInt8X64IsSupported, AvxVnniInt8.X64.IsSupported, null); + + Check("AvxVnniInt16", ExpectedAvxVnniInt16, &AvxVnniInt16IsSupported, AvxVnniInt16.IsSupported, () => AvxVnniInt16.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("AvxVnniInt16.X64", ExpectedAvxVnniInt16, &AvxVnniInt16X64IsSupported, AvxVnniInt16.X64.IsSupported, null); + Check("Avx10v1", ExpectedAvx10v1, &Avx10v1IsSupported, Avx10v1.IsSupported, () => Avx10v1.Abs(Vector128.Zero).Equals(Vector128.Zero)); Check("Avx10v1.X64", ExpectedAvx10v1, &Avx10v1X64IsSupported, Avx10v1.X64.IsSupported, null); Check("Avx10v1.V512", ExpectedAvx10v1V512, &Avx10v1V512IsSupported, Avx10v1.V512.IsSupported, () => Avx10v1.V512.Abs(Vector512.Zero).Equals(Vector512.Zero)); Check("Avx10v1.V512.X64", ExpectedAvx10v1V512, &Avx10v1V512X64IsSupported, Avx10v1.V512.X64.IsSupported, null); + Check("Avx10v2", ExpectedAvx10v2, &Avx10v2IsSupported, Avx10v2.IsSupported, () => Avx10v2.MinMax(Vector128.Zero, Vector128.Zero, 0x00).Equals(Vector128.Zero)); + Check("Avx10v2.X64", ExpectedAvx10v2, &Avx10v2X64IsSupported, Avx10v2.X64.IsSupported, null); + + Check("Avx10v2.V512", ExpectedAvx10v2V512, &Avx10v2V512IsSupported, Avx10v2.V512.IsSupported, () => Avx10v2.V512.MinMax(Vector512.Zero, Vector512.Zero, 0x00).Equals(Vector512.Zero)); + Check("Avx10v2.V512.X64", ExpectedAvx10v2V512, &Avx10v2V512X64IsSupported, Avx10v2.V512.X64.IsSupported, null); + Check("Avx512F", ExpectedAvx512F, &Avx512FIsSupported, Avx512F.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); Check("Avx512F.VL", ExpectedAvx512F, &Avx512FVLIsSupported, Avx512F.VL.IsSupported, null); Check("Avx512F.X64", ExpectedAvx512F, &Avx512FX64IsSupported, Avx512F.X64.IsSupported, null); @@ -334,10 +398,18 @@ static int Main() static bool PopcntX64IsSupported() => Popcnt.X64.IsSupported; static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; + static bool AvxVnniInt8IsSupported() => AvxVnniInt8.IsSupported; + static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; + static bool AvxVnniInt16IsSupported() => AvxVnniInt16.IsSupported; + static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; static bool Avx10v1IsSupported() => Avx10v1.IsSupported; static bool Avx10v1X64IsSupported() => Avx10v1.X64.IsSupported; static bool Avx10v1V512IsSupported() => Avx10v1.V512.IsSupported; static bool Avx10v1V512X64IsSupported() => Avx10v1.V512.X64.IsSupported; + static bool Avx10v2IsSupported() => Avx10v2.IsSupported; + static bool Avx10v2X64IsSupported() => Avx10v2.X64.IsSupported; + static bool Avx10v2V512IsSupported() => Avx10v2.V512.IsSupported; + static bool Avx10v2V512X64IsSupported() => Avx10v2.V512.X64.IsSupported; static bool Avx512FIsSupported() => Avx512F.IsSupported; static bool Avx512FVLIsSupported() => Avx512F.VL.IsSupported; static bool Avx512FX64IsSupported() => Avx512F.X64.IsSupported; diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj new file mode 100644 index 00000000000000..e2bf8afb23c146 --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj @@ -0,0 +1,33 @@ + + + Exe + 0 + true + + true + + true + true + $(DefineConstants);AVX10v2_INTRINSICS;VECTORT512_INTRINSICS + true + false + + + + + + + + /dev/null | grep -q __AVX10v2__; then + echo No support for AVX10v2, test not applicable. + exit 0 + fi +]]> + + + + + + From 93f2eb5e54efe9e8f85ef0ab66971c39191ebbf9 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 17 Apr 2025 14:13:02 -0700 Subject: [PATCH 07/32] Correct smoke tests for avx2 --- src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 1b8081eb4d44e1..899eb0470ab8d1 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -159,8 +159,8 @@ static int Main() bool? ExpectedBmi1 = null; bool? ExpectedBmi2 = null; bool? ExpectedAvxVnni = null; - bool? ExpectedAvxVnniInt8 = null; - bool? ExpectedAvxVnniInt16 = null; + bool? ExpectedAvxVnniInt8 = false; + bool? ExpectedAvxVnniInt16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; bool? ExpectedAvx10v2 = false; From 4fb7b25bd3c630f9011e9075d88444990514e602 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 18 Apr 2025 11:36:59 -0700 Subject: [PATCH 08/32] Add AvxVnniInt* implications --- src/coreclr/inc/corinfoinstructionset.h | 8 ++++++++ .../Common/JitInterface/CorInfoInstructionSet.cs | 16 ++++++++++++++++ .../ThunkGenerator/InstructionSetDesc.txt | 2 ++ 3 files changed, 26 insertions(+) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index a22dcd30f9d459..d6824fc388bfa7 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -677,6 +677,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) @@ -813,6 +817,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 1675542c832df8..1c3396510507b2 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -847,6 +847,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) @@ -984,6 +988,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) @@ -1255,6 +1263,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41)) @@ -1392,6 +1404,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41)) diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5b6f962cc71be0..83dc7b50721dfe 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -164,6 +164,8 @@ implication ,X86 ,PCLMULQDQ_V256 ,AVX implication ,X86 ,PCLMULQDQ_V512 ,PCLMULQDQ_V256 implication ,X86 ,PCLMULQDQ_V512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 +implication ,X86 ,AVXVNNIINT8 ,AVX2 +implication ,X86 ,AVXVNNIINT16 ,AVX2 implication ,X86 ,X86Serialize ,X86Base implication ,X86 ,GFNI ,SSE41 implication ,X86 ,GFNI_V256 ,GFNI From 23266633a1b6e6610f595551ee8cf81a21da533e Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 18 Apr 2025 11:38:51 -0700 Subject: [PATCH 09/32] correct smoke test for AVX2 --- src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 899eb0470ab8d1..1b8081eb4d44e1 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -159,8 +159,8 @@ static int Main() bool? ExpectedBmi1 = null; bool? ExpectedBmi2 = null; bool? ExpectedAvxVnni = null; - bool? ExpectedAvxVnniInt8 = false; - bool? ExpectedAvxVnniInt16 = false; + bool? ExpectedAvxVnniInt8 = null; + bool? ExpectedAvxVnniInt16 = null; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; bool? ExpectedAvx10v2 = false; From 73f4fbe696f53b5a3bbebfd291715985d55683b8 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 18 Apr 2025 15:11:10 -0700 Subject: [PATCH 10/32] Enable Vex encoding of AvxVnniInt* instructions when Avx10.2 is not available --- src/coreclr/jit/emitxarch.cpp | 18 ++++++++++++++++++ src/coreclr/jit/instrsxarch.h | 24 ++++++++++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 685a9be01acbdf..d90e71762804ad 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -327,6 +327,24 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256); } + case INS_vpdpwsud: + case INS_vpdpwsuds: + case INS_vpdpwusd: + case INS_vpdpwusds: + case INS_vpdpwuud: + case INS_vpdpwuuds: + case INS_vpdpbssd: + case INS_vpdpbssds: + case INS_vpdpbsud: + case INS_vpdpbsuds: + case INS_vpdpbuud: + case INS_vpdpbuuds: + { + // Evex versions of AvxVnniInt8 and AvxVnniInt16 will be supported + // with Avx10.2 ISA. + return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2); + } + default: { return HasEvexEncoding(ins); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 3612b8ae371f7c..87a79b4e6d9dbe 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -926,21 +926,21 @@ INST3(vmovw, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_ INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVXVNNIINT16_INSTRUCTION, "FIRST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results INST3(LAST_AVXVNNIINT16_INSTRUCTION, "LAST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVXVNNIINT8_INSTRUCTION, "FIRST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results INST3(LAST_AVXVNNIINT8_INSTRUCTION, "LAST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) From 7056dba57e11a5a3bb45972c851a425081e7f6aa Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 28 Apr 2025 22:00:16 -0700 Subject: [PATCH 11/32] Avx10.2 will support VEX versions of AvxVnniInt* --- src/coreclr/inc/corinfoinstructionset.h | 16 +-- src/coreclr/jit/emitxarch.cpp | 39 ++++-- src/coreclr/jit/hwintrinsic.cpp | 80 +++++++++-- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 36 +++-- src/coreclr/jit/hwintrinsiclistxarch.h | 40 ++++-- src/coreclr/jit/lowerxarch.cpp | 36 +++-- src/coreclr/jit/lsraxarch.cpp | 36 +++-- .../Compiler/HardwareIntrinsicHelpers.cs | 4 +- .../JitInterface/CorInfoInstructionSet.cs | 32 ++--- .../ThunkGenerator/InstructionSetDesc.txt | 4 +- src/native/minipal/cpufeatures.c | 2 - .../GenerateHWIntrinsicTests_X86.cs | 130 +++++++++--------- .../AvxVnniInt8/AvxVnniInt8SampleTest.cs | 10 ++ 13 files changed, 294 insertions(+), 171 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index d6824fc388bfa7..d6ce0c32117bc4 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -725,10 +725,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -865,10 +865,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index d90e71762804ad..732eef69355e5f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -3129,7 +3129,14 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con // check for a prefix in the 11 position BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + if (sizePrefix == 0) + { + // no simd prefix for Avx-Vnni-Int* ISAs subset of instructions + // INS_vpdpbuud[,s], INS_vpdpwuud[,s] + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); + } + else if (isPrefix(sizePrefix)) { // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits // @@ -3199,23 +3206,27 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con unreached(); } } + } + else + { + unreached(); + } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; - if ((check == 0x3A) || (check == 0x38)) - { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; - } + if ((check == 0x3A) || (check == 0x38)) + { + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; } } else diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 9cf0b801c22854..50f74255bddfc6 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2234,6 +2234,20 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { #if defined(TARGET_XARCH) case NI_AVXVNNIINT8_MultiplyWideningAndAdd: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || + (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) + ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte + : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); @@ -2241,15 +2255,29 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); intrinsic = (op2Type == TYP_UBYTE) - ? NI_EVEX_MultiplyWideningAndAddByteByte - : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByte - : NI_EVEX_MultiplyWideningAndAddSByteSByte); + ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte + : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } case NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || + (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); + intrinsic = (op2Type == TYP_UBYTE) + ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate + : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); @@ -2257,15 +2285,29 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); intrinsic = (op2Type == TYP_UBYTE) - ? NI_EVEX_MultiplyWideningAndAddByteByteSaturate - : ((op3Type == TYP_UBYTE) ? NI_EVEX_MultiplyWideningAndAddSByteByteSaturate - : NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate); + ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate + : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } case NI_AVXVNNIINT16_MultiplyWideningAndAdd: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || + (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) + ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16 + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16 + : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); @@ -2273,15 +2315,29 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); intrinsic = (op2Type == TYP_SHORT) - ? NI_EVEX_MultiplyWideningAndAddInt16UInt16 - : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16 - : NI_EVEX_MultiplyWideningAndAddUInt16Int16); + ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16 + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16 + : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } case NI_AVXVNNIINT16_MultiplyWideningAndAddSaturate: + { + var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); + var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); + assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || + (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); + intrinsic = (op2Type == TYP_SHORT) + ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate + : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddSaturate: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); @@ -2289,9 +2345,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); intrinsic = (op2Type == TYP_SHORT) - ? NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate - : ((op3Type == TYP_USHORT) ? NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate - : NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate); + ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate + : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 36fbd99313e17a..620bb01ab5f437 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -844,18 +844,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteSByte: - case NI_EVEX_MultiplyWideningAndAddSByteByte: - case NI_EVEX_MultiplyWideningAndAddByteByte: - case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: { assert(targetReg != REG_NA); assert(op1Reg != REG_NA); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 8cb84950e22f93..f37a28cbf01a38 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1484,6 +1484,12 @@ HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, // AVXVNNIINT8 Intrinsics #define FIRST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAdd HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) #define LAST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate @@ -1494,6 +1500,12 @@ HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, // AVXVNNIINT8_V512 Intrinsics #define FIRST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAdd, 64, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByte, 64, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByteSaturate, 64, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByte, 64, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByteSaturate, 64, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByte, 64, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByteSaturate, 64, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) #define LAST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate @@ -1504,8 +1516,14 @@ HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, // AVXVNNIINT16 Intrinsics #define FIRST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAdd HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAddSaturate +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1514,8 +1532,14 @@ HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, // AVXVNNIINT8_V512 Intrinsics #define FIRST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -#define LAST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAddSaturate +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +#define LAST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1752,18 +1776,6 @@ HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(EVEX, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e71137060663d9..cce99f0c6dedd1 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10635,18 +10635,30 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteSByte: - case NI_EVEX_MultiplyWideningAndAddSByteByte: - case NI_EVEX_MultiplyWideningAndAddByteByte: - case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: { TryMakeSrcContainedOrRegOptional(node, op3); break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 2f9877c2d2272b..0ab21b8faa5aac 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2750,18 +2750,30 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteSByte: - case NI_EVEX_MultiplyWideningAndAddSByteByte: - case NI_EVEX_MultiplyWideningAndAddByteByte: - case NI_EVEX_MultiplyWideningAndAddSByteSByteSaturate: - case NI_EVEX_MultiplyWideningAndAddSByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddByteByteSaturate: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16: - case NI_EVEX_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_EVEX_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: { assert(numArgs == 3); diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index f54a075816cda4..f7a27d312424ce 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -247,10 +247,10 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), InstructionSet.X64_AVXVNNIINT8 => AvxVnniInt8, InstructionSet.X64_AVXVNNIINT8_X64 => AvxVnniInt8, - InstructionSet.X64_AVXVNNIINT8_V512 => (AvxVnniInt8 | Avx512), + InstructionSet.X64_AVXVNNIINT8_V512 => (AvxVnniInt8 | Avx10v2), InstructionSet.X64_AVXVNNIINT16 => AvxVnniInt16, InstructionSet.X64_AVXVNNIINT16_X64 => AvxVnniInt16, - InstructionSet.X64_AVXVNNIINT16_V512 => (AvxVnniInt16 | Avx512), + InstructionSet.X64_AVXVNNIINT16_V512 => (AvxVnniInt16 | Avx10v2), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 1c3396510507b2..ab43a681baf729 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -895,10 +895,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) @@ -1036,10 +1036,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) @@ -1311,10 +1311,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -1452,10 +1452,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 83dc7b50721dfe..81efa028db7d2a 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -189,8 +189,8 @@ implication ,X86 ,AVX10v2 ,AVX10v1 implication ,X86 ,AVX10v2_V512 ,AVX10v1_V512 implication ,X86 ,AVX10v2 ,AVXVNNIINT8 implication ,X86 ,AVX10v2 ,AVXVNNIINT16 -implication ,X86 ,AVX10v2_V512 ,AVXVNNIINT8_V512 -implication ,X86 ,AVX10v2_V512 ,AVXVNNIINT16_V512 +implication ,X86 ,AVXVNNIINT8_V512 ,AVX10v2_V512 +implication ,X86 ,AVXVNNIINT16_V512 ,AVX10v2_V512 ; These synthetic ISAs need to appear after the core ISAs ; as they depend on the other implications being correct first diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index db42dfbf8b4344..7ffc03e42c4a27 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -370,8 +370,6 @@ int minipal_getcpufeatures(void) if (avx10Version >= 2) // Avx10.2 { result |= XArchIntrinsicConstants_Avx10v2; - result |= XArchIntrinsicConstants_AvxVnniInt8; // AvxVnniInt8 - result |= XArchIntrinsicConstants_AvxVnniInt16; // AvxVnniInt16 } // We assume that the Avx10/V512 support can be inferred from diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 121ddd80decbf7..ac391277ffce5f 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -1885,13 +1885,13 @@ (string templateFileName, Dictionary templateData)[] AvxVnniInt8_V512Inputs = new [] { - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), }; (string templateFileName, Dictionary templateData)[] AvxVnniInt16Inputs = new [] @@ -1913,13 +1913,13 @@ (string templateFileName, Dictionary templateData)[] AvxVnniInt16_V512Inputs = new [] { - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), }; (string templateFileName, Dictionary templateData)[] Avx512F_ScalarUpperInputs = new [] @@ -3805,59 +3805,59 @@ bool isImmTemplate(string name) string outputDirectory = args[2]; string testListFileName = args[3]; -// ProcessInputs("Sse1", Sse1Inputs); -// ProcessInputs("Sse1.X64", Sse1X64Inputs); -// ProcessInputs("Sse2", Sse2Inputs); -// ProcessInputs("Sse2.X64", Sse2X64Inputs); -// ProcessInputs("Sse3", Sse3Inputs); -// ProcessInputs("Ssse3", Ssse3Inputs); -// ProcessInputs("Sse41", Sse41Inputs); -// ProcessInputs("Sse41_Overloaded", Sse41_OverloadedInputs); -// ProcessInputs("Sse41.X64", Sse41X64Inputs); -// ProcessInputs("Sse42", Sse42Inputs); -// ProcessInputs("Avx1", Avx1Inputs); -// ProcessInputs("Avx1_Vector128", Avx1_Vector128Inputs); -// ProcessInputs("Avx2", Avx2Inputs); -// ProcessInputs("Avx2_Vector128", Avx2_Vector128Inputs); -// ProcessInputs("Avx512F", Avx512FInputs); -// ProcessInputs("Avx512F_ScalarUpper", Avx512F_ScalarUpperInputs); -// ProcessInputs("Avx512F_VL_Vector128", Avx512F_VL_Vector128Inputs); -// ProcessInputs("Avx512F_VL_Vector256", Avx512F_VL_Vector256Inputs); -// ProcessInputs("Avx512F_X64", Avx512F_X64Inputs); -// ProcessInputs("Avx512BW", Avx512BWInputs); -// ProcessInputs("Avx512BW_VL_Vector128", Avx512BW_VL_Vector128Inputs); -// ProcessInputs("Avx512BW_VL_Vector256", Avx512BW_VL_Vector256Inputs); -// ProcessInputs("Avx512CD", Avx512CDInputs); -// ProcessInputs("Avx512CD_VL_Vector128", Avx512CD_VL_Vector128Inputs); -// ProcessInputs("Avx512CD_VL_Vector256", Avx512CD_VL_Vector256Inputs); -// ProcessInputs("Avx512DQ", Avx512DQInputs); -// ProcessInputs("Avx512DQ_ScalarUpper", Avx512DQ_ScalarUpperInputs); -// ProcessInputs("Avx512DQ_VL_Vector128", Avx512DQ_VL_Vector128Inputs); -// ProcessInputs("Avx512DQ_VL_Vector256", Avx512DQ_VL_Vector256Inputs); -// ProcessInputs("Avx512Vbmi", Avx512VbmiInputs); -// ProcessInputs("Avx512Vbmi_VL_Vector128", Avx512Vbmi_VL_Vector128Inputs); -// ProcessInputs("Avx512Vbmi_VL_Vector256", Avx512Vbmi_VL_Vector256Inputs); -// ProcessInputs("Avx10v1_ScalarUpper", Avx10v1_ScalarUpperInputs); -// ProcessInputs("Avx10v1_Vector128", Avx10v1_Vector128Inputs); -// ProcessInputs("Avx10v1_Vector256", Avx10v1_Vector256Inputs); -// ProcessInputs("Avx10v1_V512", Avx10v1_V512Inputs); -// ProcessInputs("Fma_Vector128", Fma_Vector128Inputs); -// ProcessInputs("Fma_Vector256", Fma_Vector256Inputs); -// ProcessInputs("Bmi1", Bmi1Inputs); -// ProcessInputs("Bmi1.X64", Bmi1X64Inputs); -// ProcessInputs("Aes", AesInputs); -// ProcessInputs("Pclmulqdq", PclmulqdqInputs); -// ProcessInputs("Pclmulqdq.V256", PclmulqdqV256Inputs); -// ProcessInputs("Pclmulqdq.V512", PclmulqdqV512Inputs); -// ProcessInputs("Bmi2", Bmi2Inputs); -// ProcessInputs("Bmi2.X64", Bmi2X64Inputs); -// ProcessInputs("X86Base", X86BaseInputs); -// ProcessInputs("X86Base.X64", X86BaseX64Inputs); -// ProcessInputs("Gfni", GfniInputs); -// ProcessInputs("Gfni.V256", GfniV256Inputs); -// ProcessInputs("Gfni.V512", GfniV512Inputs); -// ProcessInputs("Avx10v2", Avx10v2Inputs); -// ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs); +ProcessInputs("Sse1", Sse1Inputs); +ProcessInputs("Sse1.X64", Sse1X64Inputs); +ProcessInputs("Sse2", Sse2Inputs); +ProcessInputs("Sse2.X64", Sse2X64Inputs); +ProcessInputs("Sse3", Sse3Inputs); +ProcessInputs("Ssse3", Ssse3Inputs); +ProcessInputs("Sse41", Sse41Inputs); +ProcessInputs("Sse41_Overloaded", Sse41_OverloadedInputs); +ProcessInputs("Sse41.X64", Sse41X64Inputs); +ProcessInputs("Sse42", Sse42Inputs); +ProcessInputs("Avx1", Avx1Inputs); +ProcessInputs("Avx1_Vector128", Avx1_Vector128Inputs); +ProcessInputs("Avx2", Avx2Inputs); +ProcessInputs("Avx2_Vector128", Avx2_Vector128Inputs); +ProcessInputs("Avx512F", Avx512FInputs); +ProcessInputs("Avx512F_ScalarUpper", Avx512F_ScalarUpperInputs); +ProcessInputs("Avx512F_VL_Vector128", Avx512F_VL_Vector128Inputs); +ProcessInputs("Avx512F_VL_Vector256", Avx512F_VL_Vector256Inputs); +ProcessInputs("Avx512F_X64", Avx512F_X64Inputs); +ProcessInputs("Avx512BW", Avx512BWInputs); +ProcessInputs("Avx512BW_VL_Vector128", Avx512BW_VL_Vector128Inputs); +ProcessInputs("Avx512BW_VL_Vector256", Avx512BW_VL_Vector256Inputs); +ProcessInputs("Avx512CD", Avx512CDInputs); +ProcessInputs("Avx512CD_VL_Vector128", Avx512CD_VL_Vector128Inputs); +ProcessInputs("Avx512CD_VL_Vector256", Avx512CD_VL_Vector256Inputs); +ProcessInputs("Avx512DQ", Avx512DQInputs); +ProcessInputs("Avx512DQ_ScalarUpper", Avx512DQ_ScalarUpperInputs); +ProcessInputs("Avx512DQ_VL_Vector128", Avx512DQ_VL_Vector128Inputs); +ProcessInputs("Avx512DQ_VL_Vector256", Avx512DQ_VL_Vector256Inputs); +ProcessInputs("Avx512Vbmi", Avx512VbmiInputs); +ProcessInputs("Avx512Vbmi_VL_Vector128", Avx512Vbmi_VL_Vector128Inputs); +ProcessInputs("Avx512Vbmi_VL_Vector256", Avx512Vbmi_VL_Vector256Inputs); +ProcessInputs("Avx10v1_ScalarUpper", Avx10v1_ScalarUpperInputs); +ProcessInputs("Avx10v1_Vector128", Avx10v1_Vector128Inputs); +ProcessInputs("Avx10v1_Vector256", Avx10v1_Vector256Inputs); +ProcessInputs("Avx10v1_V512", Avx10v1_V512Inputs); +ProcessInputs("Fma_Vector128", Fma_Vector128Inputs); +ProcessInputs("Fma_Vector256", Fma_Vector256Inputs); +ProcessInputs("Bmi1", Bmi1Inputs); +ProcessInputs("Bmi1.X64", Bmi1X64Inputs); +ProcessInputs("Aes", AesInputs); +ProcessInputs("Pclmulqdq", PclmulqdqInputs); +ProcessInputs("Pclmulqdq.V256", PclmulqdqV256Inputs); +ProcessInputs("Pclmulqdq.V512", PclmulqdqV512Inputs); +ProcessInputs("Bmi2", Bmi2Inputs); +ProcessInputs("Bmi2.X64", Bmi2X64Inputs); +ProcessInputs("X86Base", X86BaseInputs); +ProcessInputs("X86Base.X64", X86BaseX64Inputs); +ProcessInputs("Gfni", GfniInputs); +ProcessInputs("Gfni.V256", GfniV256Inputs); +ProcessInputs("Gfni.V512", GfniV512Inputs); +ProcessInputs("Avx10v2", Avx10v2Inputs); +ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs); ProcessInputs("AvxVnniInt8", AvxVnniInt8Inputs); ProcessInputs("AvxVnniInt8_V512", AvxVnniInt8_V512Inputs); ProcessInputs("AvxVnniInt16", AvxVnniInt16Inputs); diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs index 18a547f3046dca..52758b2ffcb2c1 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs @@ -27,6 +27,12 @@ public static Vector256 getAbs256(Vector256 val) return Avx10v2.Abs(val); } + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getMWA(Vector128 v1, Vector128 v2, Vector128 v3) + { + return AvxVnniInt8.MultiplyWideningAndAdd(v1, v2, v3); + } + [Fact] public static unsafe void AvxVnniInt8SampleTest () { @@ -34,6 +40,10 @@ public static unsafe void AvxVnniInt8SampleTest () if (AvxVnniInt8.IsSupported) { Console.WriteLine("AvxVnniInt8 supported"); + Vector128 v1 = Vector128.Create(5); + Vector128 v2 = Vector128.Create(5); + Vector128 v3 = Vector128.Create(5); + v1 = getMWA(v1, v2, v3); } else { Console.WriteLine("AvxVnniInt8 not supported"); From 2a3a10a7dd0c60dafca7010b3efe447846662572 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 28 Apr 2025 23:24:19 -0700 Subject: [PATCH 12/32] Run formatting --- src/coreclr/jit/emitxarch.cpp | 2 +- src/coreclr/jit/hwintrinsic.cpp | 46 +++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 732eef69355e5f..468b09294693da 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -341,7 +341,7 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const case INS_vpdpbuuds: { // Evex versions of AvxVnniInt8 and AvxVnniInt16 will be supported - // with Avx10.2 ISA. + // with Avx10.2 ISA. return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2); } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 50f74255bddfc6..c0d623f27d3590 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2254,10 +2254,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte - : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte); + intrinsic = + (op2Type == TYP_UBYTE) + ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte + : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; @@ -2269,10 +2270,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate - : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate); + intrinsic = + (op2Type == TYP_UBYTE) + ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate + : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate + : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; @@ -2286,8 +2288,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); intrinsic = (op2Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate - : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate); + : ((op3Type == TYP_UBYTE) + ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate + : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; @@ -2314,10 +2317,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16 - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16 - : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16); + intrinsic = + (op2Type == TYP_SHORT) + ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16 + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16 + : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; @@ -2329,10 +2333,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate - : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate); + intrinsic = + (op2Type == TYP_SHORT) + ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate + : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate + : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; @@ -2346,8 +2351,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); intrinsic = (op2Type == TYP_SHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate - : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate); + : ((op3Type == TYP_USHORT) + ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate + : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate); retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; From 12d90eb95f289b8a1be1d30e55bfd8f320c98e2d Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 9 May 2025 13:58:59 -0700 Subject: [PATCH 13/32] Disable Avx10.2 smoketest --- .../SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj index e2bf8afb23c146..d56d5147605bcc 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj @@ -19,11 +19,9 @@ /dev/null | grep -q __AVX10v2__; then - echo No support for AVX10v2, test not applicable. - exit 0 - fi +$(CLRTestBatchPreCommands) + echo No support for AVX10v2, test not applicable. + exit /B 0 ]]> From 0863c8df2599e3eea64fa78ba6867beef3caebc1 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 14 May 2025 22:50:32 -0700 Subject: [PATCH 14/32] Fix the Avx10.2 smoketest --- .../nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj index d56d5147605bcc..310ecaf637f353 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj @@ -8,7 +8,6 @@ true true - $(DefineConstants);AVX10v2_INTRINSICS;VECTORT512_INTRINSICS true false From 08f256a74dfb563e4497811e89e6c79fb784bf56 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 14 May 2025 23:33:15 -0700 Subject: [PATCH 15/32] Fix the smoketest --- .../nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj index 310ecaf637f353..4e8381cac23f00 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj @@ -4,10 +4,11 @@ 0 true - true + true true true + $(DefineConstants);AVX10v2_INTRINSICS;VECTORT512_INTRINSICS true false From 31246e5015b775410507d7ebdfc78aeb5ccf7d49 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 28 May 2025 11:24:03 -0700 Subject: [PATCH 16/32] Fix assert and instruction definition to merge with main --- src/coreclr/jit/emitxarch.cpp | 4 +- src/coreclr/jit/hwintrinsiclistxarch.h | 64 +++++++++---------- src/coreclr/jit/instrsxarch.h | 36 ++++------- .../AvxVnniInt8/AvxVnniInt8SampleTest.cs | 4 -- 4 files changed, 47 insertions(+), 61 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1a87e64bdcd5b8..926ea1da5f3d4a 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -18142,7 +18142,9 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI { case INS_TT_FULL: { - assert(inputSize == 4 || inputSize == 8); + instruction ins = id->idIns(); + assert((inputSize == 4 || inputSize == 8) || IsAVXVNNIINT8Instruction(ins) || + IsAVXVNNIINT16Instruction(ins)); if (HasEmbeddedBroadcast(id)) { // N = input size in bytes diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index ae5dd75805b350..9ce5a61b689dca 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1492,14 +1492,14 @@ HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT8 Intrinsics #define FIRST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1508,14 +1508,14 @@ HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT8_V512 Intrinsics #define FIRST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAdd, 64, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByte, 64, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByteSaturate, 64, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByte, 64, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByteSaturate, 64, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByte, 64, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByteSaturate, 64, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAdd, 64, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByte, 64, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByteSaturate, 64, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByte, 64, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByteSaturate, 64, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByte, 64, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByteSaturate, 64, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1524,14 +1524,14 @@ HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT16 Intrinsics #define FIRST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -1540,14 +1540,14 @@ HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16Saturate, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT8_V512 Intrinsics #define FIRST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index c94a10e6c3ea39..25b9a53718fe0f 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -947,36 +947,24 @@ INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_ INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single INST3(vmovd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs INST3(vmovw, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVXVNNIINT16_INSTRUCTION, "FIRST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results INST3(LAST_AVXVNNIINT16_INSTRUCTION, "LAST_AVXVNNIINT16_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) INST3(FIRST_AVXVNNIINT8_INSTRUCTION, "FIRST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results INST3(LAST_AVXVNNIINT8_INSTRUCTION, "LAST_AVXVNNIINT8_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs index 52758b2ffcb2c1..551b151f79e056 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs @@ -40,10 +40,6 @@ public static unsafe void AvxVnniInt8SampleTest () if (AvxVnniInt8.IsSupported) { Console.WriteLine("AvxVnniInt8 supported"); - Vector128 v1 = Vector128.Create(5); - Vector128 v2 = Vector128.Create(5); - Vector128 v3 = Vector128.Create(5); - v1 = getMWA(v1, v2, v3); } else { Console.WriteLine("AvxVnniInt8 not supported"); From caccae4c966740e0764311fe84e7f2ef9d10e186 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 28 May 2025 12:03:32 -0700 Subject: [PATCH 17/32] Update src/coreclr/inc/clrconfigvalues.h Co-authored-by: Tanner Gooding --- src/coreclr/inc/clrconfigvalues.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 0a0ea6949b3e47..1800b0ca1156dd 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -695,8 +695,8 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512V RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT8, W("EnableAVXVNNIINT8"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT16, W("EnableAVXVNNIINT16"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT8, W("EnableAVXVNNIINT8"), 1, "Allows AVXVNNI8+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT16, W("EnableAVXVNNIINT16"), 1, "Allows AVXVNNI16+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") From 5a42eb2ddf10696de2695c106a61b3cfbf630339 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 29 May 2025 15:30:25 -0700 Subject: [PATCH 18/32] Fix nativeAOT smoketests and address review comments --- src/coreclr/jit/emitxarch.cpp | 17 +++++++++-------- src/coreclr/jit/emitxarch.h | 1 + .../SmokeTests/HardwareIntrinsics/Program.cs | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 926ea1da5f3d4a..6c232acf3e00bd 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -110,6 +110,11 @@ bool emitter::IsAVXVNNIINT16Instruction(instruction ins) return (ins >= INS_FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT16_INSTRUCTION); } +bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) +{ + return (IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); +} + bool emitter::IsBMIInstruction(instruction ins) { return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION); @@ -10049,8 +10054,7 @@ void emitter::emitIns_SIMD_R_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || - IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIFamilyInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10083,8 +10087,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || - IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIFamilyInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 @@ -10115,8 +10118,7 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, regNumber op3Reg, insOpts instOptions) { - if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || - IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)) + if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIFamilyInstruction(ins)) { assert(UseSimdEncoding()); @@ -10203,8 +10205,7 @@ void emitter::emitIns_SIMD_R_R_R_S(instruction ins, int offs, insOpts instOptions) { - assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins) || - IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); + assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIFamilyInstruction(ins)); assert(UseSimdEncoding()); // Ensure we aren't overwriting op2 diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 3d6eaaf78b6fb9..4dad376702566d 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -126,6 +126,7 @@ static bool IsKMOVInstruction(instruction ins); static bool IsAVXVNNIInstruction(instruction ins); static bool IsAVXVNNIINT8Instruction(instruction ins); static bool IsAVXVNNIINT16Instruction(instruction ins); +static bool IsAVXVNNIFamilyInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); static bool IsKInstructionWithLBit(instruction ins); diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 61ae0be0d83185..c09a45b114da1a 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -131,6 +131,8 @@ static int Main() bool? ExpectedAvxVnniInt16 = null; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; + bool? ExpectedAvx10v2V512 = false; bool? ExpectedAvx512F = false; bool? ExpectedAvx512BW = false; bool? ExpectedAvx512CD = false; From 601a967f6f8fce762f77826dc1476b898c0ec524 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 10 Jun 2025 14:47:50 -0700 Subject: [PATCH 19/32] Resolve conflicts after merge --- src/coreclr/inc/corinfoinstructionset.h | 172 +++++++++----- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 4 + src/coreclr/jit/hwintrinsic.cpp | 4 + src/coreclr/jit/hwintrinsicxarch.cpp | 87 ------- .../Runtime/ReadyToRunInstructionSet.cs | 4 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 12 + .../JitInterface/CorInfoInstructionSet.cs | 218 +++++++++++++----- .../ThunkGenerator/InstructionSetDesc.txt | 8 +- .../SmokeTests/HardwareIntrinsics/Program.cs | 35 +-- 10 files changed, 323 insertions(+), 231 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index b4742cada3b9af..3663c3de5eefb8 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -90,32 +90,38 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_X86Base_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_POPCNT_X64=46, - InstructionSet_AVX_X64=47, - InstructionSet_AVX2_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_AVX512_X64=53, - InstructionSet_AVX512VBMI_X64=54, - InstructionSet_AVX512v3_X64=55, - InstructionSet_AVX10v1_X64=56, - InstructionSet_AVX10v2_X64=57, - InstructionSet_AES_X64=58, - InstructionSet_PCLMULQDQ_X64=59, - InstructionSet_AVX512VP2INTERSECT_X64=60, - InstructionSet_AVXIFMA_X64=61, - InstructionSet_AVXVNNI_X64=62, - InstructionSet_GFNI_X64=63, - InstructionSet_SHA_X64=64, - InstructionSet_WAITPKG_X64=65, - InstructionSet_X86Serialize_X64=66, + InstructionSet_AVXVNNIINT8=41, + InstructionSet_AVXVNNIINT8_V512=42, + InstructionSet_AVXVNNIINT16=43, + InstructionSet_AVXVNNIINT16_V512=44, + InstructionSet_X86Base_X64=45, + InstructionSet_SSE3_X64=46, + InstructionSet_SSSE3_X64=47, + InstructionSet_SSE41_X64=48, + InstructionSet_SSE42_X64=49, + InstructionSet_POPCNT_X64=50, + InstructionSet_AVX_X64=51, + InstructionSet_AVX2_X64=52, + InstructionSet_BMI1_X64=53, + InstructionSet_BMI2_X64=54, + InstructionSet_FMA_X64=55, + InstructionSet_LZCNT_X64=56, + InstructionSet_AVX512_X64=57, + InstructionSet_AVX512VBMI_X64=58, + InstructionSet_AVX512v3_X64=59, + InstructionSet_AVX10v1_X64=60, + InstructionSet_AVX10v2_X64=61, + InstructionSet_AES_X64=62, + InstructionSet_PCLMULQDQ_X64=63, + InstructionSet_AVX512VP2INTERSECT_X64=64, + InstructionSet_AVXIFMA_X64=65, + InstructionSet_AVXVNNI_X64=66, + InstructionSet_GFNI_X64=67, + InstructionSet_AVXVNNIINT8_X64=68, + InstructionSet_AVXVNNIINT16_X64=69, + InstructionSet_SHA_X64=70, + InstructionSet_WAITPKG_X64=71, + InstructionSet_X86Serialize_X64=72, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -158,32 +164,38 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_X86Base_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_POPCNT_X64=46, - InstructionSet_AVX_X64=47, - InstructionSet_AVX2_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_AVX512_X64=53, - InstructionSet_AVX512VBMI_X64=54, - InstructionSet_AVX512v3_X64=55, - InstructionSet_AVX10v1_X64=56, - InstructionSet_AVX10v2_X64=57, - InstructionSet_AES_X64=58, - InstructionSet_PCLMULQDQ_X64=59, - InstructionSet_AVX512VP2INTERSECT_X64=60, - InstructionSet_AVXIFMA_X64=61, - InstructionSet_AVXVNNI_X64=62, - InstructionSet_GFNI_X64=63, - InstructionSet_SHA_X64=64, - InstructionSet_WAITPKG_X64=65, - InstructionSet_X86Serialize_X64=66, + InstructionSet_AVXVNNIINT8=41, + InstructionSet_AVXVNNIINT8_V512=42, + InstructionSet_AVXVNNIINT16=43, + InstructionSet_AVXVNNIINT16_V512=44, + InstructionSet_X86Base_X64=45, + InstructionSet_SSE3_X64=46, + InstructionSet_SSSE3_X64=47, + InstructionSet_SSE41_X64=48, + InstructionSet_SSE42_X64=49, + InstructionSet_POPCNT_X64=50, + InstructionSet_AVX_X64=51, + InstructionSet_AVX2_X64=52, + InstructionSet_BMI1_X64=53, + InstructionSet_BMI2_X64=54, + InstructionSet_FMA_X64=55, + InstructionSet_LZCNT_X64=56, + InstructionSet_AVX512_X64=57, + InstructionSet_AVX512VBMI_X64=58, + InstructionSet_AVX512v3_X64=59, + InstructionSet_AVX10v1_X64=60, + InstructionSet_AVX10v2_X64=61, + InstructionSet_AES_X64=62, + InstructionSet_PCLMULQDQ_X64=63, + InstructionSet_AVX512VP2INTERSECT_X64=64, + InstructionSet_AVXIFMA_X64=65, + InstructionSet_AVXVNNI_X64=66, + InstructionSet_GFNI_X64=67, + InstructionSet_AVXVNNIINT8_X64=68, + InstructionSet_AVXVNNIINT16_X64=69, + InstructionSet_SHA_X64=70, + InstructionSet_WAITPKG_X64=71, + InstructionSet_X86Serialize_X64=72, #endif // TARGET_X86 }; @@ -355,6 +367,10 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_WAITPKG_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT8)) + AddInstructionSet(InstructionSet_AVXVNNIINT8_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT16)) + AddInstructionSet(InstructionSet_AVXVNNIINT16_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -551,6 +567,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) @@ -613,6 +637,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) @@ -623,6 +651,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SHA); if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) @@ -721,6 +753,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) @@ -731,6 +767,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SHA); if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) @@ -977,6 +1017,18 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_AVXVNNIINT8 : + return "AVXVNNIINT8"; + case InstructionSet_AVXVNNIINT8_X64 : + return "AVXVNNIINT8_X64"; + case InstructionSet_AVXVNNIINT8_V512 : + return "AVXVNNIINT8_V512"; + case InstructionSet_AVXVNNIINT16 : + return "AVXVNNIINT16"; + case InstructionSet_AVXVNNIINT16_X64 : + return "AVXVNNIINT16_X64"; + case InstructionSet_AVXVNNIINT16_V512 : + return "AVXVNNIINT16_V512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1059,6 +1111,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_AVXVNNIINT8 : + return "AVXVNNIINT8"; + case InstructionSet_AVXVNNIINT8_V512 : + return "AVXVNNIINT8_V512"; + case InstructionSet_AVXVNNIINT16 : + return "AVXVNNIINT16"; + case InstructionSet_AVXVNNIINT16_V512 : + return "AVXVNNIINT16_V512"; #endif // TARGET_X86 default: @@ -1163,6 +1223,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1228,6 +1292,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index b967f005d93e9f..e0645c2385f0af 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 7bb8fde8-cbcb-40ed-b4c2-1a500fc1b595 */ - 0x7bb8fde8, - 0xcbcb, - 0x40ed, - {0xb4, 0xc2, 0x1a, 0x50, 0x0f, 0xc1, 0xb5, 0x95} +constexpr GUID JITEEVersionIdentifier = { /* 79c88673-1495-45cc-881a-74cf6581740b */ + 0x79c88673, + 0x1495, + 0x45cc, + {0x88, 0x1a, 0x74, 0xcf, 0x65, 0x81, 0x74, 0x0b} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 01f92e168c6b39..563f5f697e2be7 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -87,6 +87,10 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx512Vp2intersect_VL=81, READYTORUN_INSTRUCTION_Avx512Vpopcntdq=82, READYTORUN_INSTRUCTION_Avx512Vpopcntdq_VL=83, + READYTORUN_INSTRUCTION_AvxVnniInt8=84, + READYTORUN_INSTRUCTION_AvxVnniInt8_V512=85, + READYTORUN_INSTRUCTION_AvxVnniInt16=86, + READYTORUN_INSTRUCTION_AvxVnniInt16_V512=87, }; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a18ed14264d886..e4cb2855915021 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -942,6 +942,10 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 + { FIRST_NI_AVXVNNIINT8, LAST_NI_AVXVNNIINT8 }, // AVXVNNIINT8 + { FIRST_NI_AVXVNNIINT8_V512, LAST_NI_AVXVNNIINT8_V512 }, // AVXVNNIINT8V512 + { FIRST_NI_AVXVNNIINT16, LAST_NI_AVXVNNIINT16 }, // AVXVNNIINT16 + { FIRST_NI_AVXVNNIINT16_V512, LAST_NI_AVXVNNIINT16_V512 }, // AVXVNNIINT16V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64 { NI_Illegal, NI_Illegal }, // SSE3_X64 diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 6da1e13fc6cd7e..13c34fc4d868d6 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -947,93 +947,6 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic // true if isa is supported; otherwise, false bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) { - switch (isa) - { - // These ISAs are fully implemented - case InstructionSet_AES: - case InstructionSet_AES_X64: - case InstructionSet_AVX: - case InstructionSet_AVX_X64: - case InstructionSet_AVX2: - case InstructionSet_AVX2_X64: - case InstructionSet_AVX512F: - case InstructionSet_AVX512F_VL: - case InstructionSet_AVX512F_X64: - case InstructionSet_AVX512BW: - case InstructionSet_AVX512BW_VL: - case InstructionSet_AVX512BW_X64: - case InstructionSet_AVX512CD: - case InstructionSet_AVX512CD_VL: - case InstructionSet_AVX512CD_X64: - case InstructionSet_AVX512DQ: - case InstructionSet_AVX512DQ_VL: - case InstructionSet_AVX512DQ_X64: - case InstructionSet_AVX512VBMI: - case InstructionSet_AVX512VBMI_VL: - case InstructionSet_AVX512VBMI_X64: - case InstructionSet_AVXVNNI: - case InstructionSet_AVXVNNI_X64: - case InstructionSet_BMI1: - case InstructionSet_BMI1_X64: - case InstructionSet_BMI2: - case InstructionSet_BMI2_X64: - case InstructionSet_FMA: - case InstructionSet_FMA_X64: - case InstructionSet_LZCNT: - case InstructionSet_LZCNT_X64: - case InstructionSet_PCLMULQDQ: - case InstructionSet_PCLMULQDQ_X64: - case InstructionSet_PCLMULQDQ_V256: - case InstructionSet_PCLMULQDQ_V512: - case InstructionSet_POPCNT: - case InstructionSet_POPCNT_X64: - case InstructionSet_SSE: - case InstructionSet_SSE_X64: - case InstructionSet_SSE2: - case InstructionSet_SSE2_X64: - case InstructionSet_SSE3: - case InstructionSet_SSE3_X64: - case InstructionSet_SSSE3: - case InstructionSet_SSSE3_X64: - case InstructionSet_SSE41: - case InstructionSet_SSE41_X64: - case InstructionSet_SSE42: - case InstructionSet_SSE42_X64: - case InstructionSet_Vector128: - case InstructionSet_Vector256: - case InstructionSet_Vector512: - case InstructionSet_X86Base: - case InstructionSet_X86Base_X64: - case InstructionSet_X86Serialize: - case InstructionSet_X86Serialize_X64: - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v1_V512: - case InstructionSet_AVX10v1_V512_X64: - case InstructionSet_AVX10v2: - case InstructionSet_AVX10v2_X64: - case InstructionSet_AVX10v2_V512: - case InstructionSet_AVX10v2_V512_X64: - case InstructionSet_AVXVNNIINT8: - case InstructionSet_AVXVNNIINT8_X64: - case InstructionSet_AVXVNNIINT8_V512: - case InstructionSet_AVXVNNIINT16: - case InstructionSet_AVXVNNIINT16_X64: - case InstructionSet_AVXVNNIINT16_V512: - case InstructionSet_EVEX: - case InstructionSet_GFNI: - case InstructionSet_GFNI_X64: - case InstructionSet_GFNI_V256: - case InstructionSet_GFNI_V512: - { - return true; - } - - default: - { - return false; - } - } // All ISAs are currently fully implemented return true; } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 47837ff594ce4d..a4a5411bfd04f8 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -90,6 +90,10 @@ public enum ReadyToRunInstructionSet Avx512Vp2intersect_VL=81, Avx512Vpopcntdq=82, Avx512Vpopcntdq_VL=83, + AvxVnniInt8=84, + AvxVnniInt8_V512=85, + AvxVnniInt16=86, + AvxVnniInt16_V512=87, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 8e3abccd0118f2..362b79692e5373 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -137,6 +137,12 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X64_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT8_X64: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X64_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X64_AVXVNNIINT16_X64: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X64_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; default: throw new Exception("Unknown instruction set"); } @@ -212,6 +218,12 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X86_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X86_AVXVNNIINT8_X64: return null; + case InstructionSet.X86_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X86_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; + case InstructionSet.X86_AVXVNNIINT16_X64: return null; + case InstructionSet.X86_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 7947bfd1db61ba..e1a6a50ee25ad8 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -86,6 +86,10 @@ public enum InstructionSet X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, + X64_AVXVNNIINT8 = InstructionSet_X64.AVXVNNIINT8, + X64_AVXVNNIINT8_V512 = InstructionSet_X64.AVXVNNIINT8_V512, + X64_AVXVNNIINT16 = InstructionSet_X64.AVXVNNIINT16, + X64_AVXVNNIINT16_V512 = InstructionSet_X64.AVXVNNIINT16_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE3_X64 = InstructionSet_X64.SSE3_X64, X64_SSSE3_X64 = InstructionSet_X64.SSSE3_X64, @@ -109,6 +113,8 @@ public enum InstructionSet X64_AVXIFMA_X64 = InstructionSet_X64.AVXIFMA_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, + X64_AVXVNNIINT8_X64 = InstructionSet_X64.AVXVNNIINT8_X64, + X64_AVXVNNIINT16_X64 = InstructionSet_X64.AVXVNNIINT16_X64, X64_SHA_X64 = InstructionSet_X64.SHA_X64, X64_WAITPKG_X64 = InstructionSet_X64.WAITPKG_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, @@ -152,6 +158,10 @@ public enum InstructionSet X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, + X86_AVXVNNIINT8 = InstructionSet_X86.AVXVNNIINT8, + X86_AVXVNNIINT8_V512 = InstructionSet_X86.AVXVNNIINT8_V512, + X86_AVXVNNIINT16 = InstructionSet_X86.AVXVNNIINT16, + X86_AVXVNNIINT16_V512 = InstructionSet_X86.AVXVNNIINT16_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE3_X64 = InstructionSet_X86.SSE3_X64, X86_SSSE3_X64 = InstructionSet_X86.SSSE3_X64, @@ -175,6 +185,8 @@ public enum InstructionSet X86_AVXIFMA_X64 = InstructionSet_X86.AVXIFMA_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, + X86_AVXVNNIINT8_X64 = InstructionSet_X86.AVXVNNIINT8_X64, + X86_AVXVNNIINT16_X64 = InstructionSet_X86.AVXVNNIINT16_X64, X86_SHA_X64 = InstructionSet_X86.SHA_X64, X86_WAITPKG_X64 = InstructionSet_X86.WAITPKG_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, @@ -265,32 +277,38 @@ public enum InstructionSet_X64 VectorT128 = 38, VectorT256 = 39, VectorT512 = 40, - X86Base_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - POPCNT_X64 = 46, - AVX_X64 = 47, - AVX2_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - AVX512_X64 = 53, - AVX512VBMI_X64 = 54, - AVX512v3_X64 = 55, - AVX10v1_X64 = 56, - AVX10v2_X64 = 57, - AES_X64 = 58, - PCLMULQDQ_X64 = 59, - AVX512VP2INTERSECT_X64 = 60, - AVXIFMA_X64 = 61, - AVXVNNI_X64 = 62, - GFNI_X64 = 63, - SHA_X64 = 64, - WAITPKG_X64 = 65, - X86Serialize_X64 = 66, + AVXVNNIINT8 = 41, + AVXVNNIINT8_V512 = 42, + AVXVNNIINT16 = 43, + AVXVNNIINT16_V512 = 44, + X86Base_X64 = 45, + SSE3_X64 = 46, + SSSE3_X64 = 47, + SSE41_X64 = 48, + SSE42_X64 = 49, + POPCNT_X64 = 50, + AVX_X64 = 51, + AVX2_X64 = 52, + BMI1_X64 = 53, + BMI2_X64 = 54, + FMA_X64 = 55, + LZCNT_X64 = 56, + AVX512_X64 = 57, + AVX512VBMI_X64 = 58, + AVX512v3_X64 = 59, + AVX10v1_X64 = 60, + AVX10v2_X64 = 61, + AES_X64 = 62, + PCLMULQDQ_X64 = 63, + AVX512VP2INTERSECT_X64 = 64, + AVXIFMA_X64 = 65, + AVXVNNI_X64 = 66, + GFNI_X64 = 67, + AVXVNNIINT8_X64 = 68, + AVXVNNIINT16_X64 = 69, + SHA_X64 = 70, + WAITPKG_X64 = 71, + X86Serialize_X64 = 72, } public enum InstructionSet_X86 @@ -337,32 +355,38 @@ public enum InstructionSet_X86 VectorT128 = 38, VectorT256 = 39, VectorT512 = 40, - X86Base_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - POPCNT_X64 = 46, - AVX_X64 = 47, - AVX2_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - AVX512_X64 = 53, - AVX512VBMI_X64 = 54, - AVX512v3_X64 = 55, - AVX10v1_X64 = 56, - AVX10v2_X64 = 57, - AES_X64 = 58, - PCLMULQDQ_X64 = 59, - AVX512VP2INTERSECT_X64 = 60, - AVXIFMA_X64 = 61, - AVXVNNI_X64 = 62, - GFNI_X64 = 63, - SHA_X64 = 64, - WAITPKG_X64 = 65, - X86Serialize_X64 = 66, + AVXVNNIINT8 = 41, + AVXVNNIINT8_V512 = 42, + AVXVNNIINT16 = 43, + AVXVNNIINT16_V512 = 44, + X86Base_X64 = 45, + SSE3_X64 = 46, + SSSE3_X64 = 47, + SSE41_X64 = 48, + SSE42_X64 = 49, + POPCNT_X64 = 50, + AVX_X64 = 51, + AVX2_X64 = 52, + BMI1_X64 = 53, + BMI2_X64 = 54, + FMA_X64 = 55, + LZCNT_X64 = 56, + AVX512_X64 = 57, + AVX512VBMI_X64 = 58, + AVX512v3_X64 = 59, + AVX10v1_X64 = 60, + AVX10v2_X64 = 61, + AES_X64 = 62, + PCLMULQDQ_X64 = 63, + AVX512VP2INTERSECT_X64 = 64, + AVXIFMA_X64 = 65, + AVXVNNI_X64 = 66, + GFNI_X64 = 67, + AVXVNNIINT8_X64 = 68, + AVXVNNIINT16_X64 = 69, + SHA_X64 = 70, + WAITPKG_X64 = 71, + X86Serialize_X64 = 72, } public unsafe struct InstructionSetFlags : IEnumerable @@ -701,6 +725,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3)) @@ -763,6 +795,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) @@ -773,6 +809,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_SHA)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256)) @@ -872,6 +912,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) @@ -882,6 +926,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_SHA)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256)) @@ -1045,6 +1093,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) @@ -1107,6 +1159,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) @@ -1117,6 +1173,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SHA); if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) @@ -1216,6 +1276,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) @@ -1226,6 +1290,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_SHA); if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) @@ -1409,6 +1477,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true); + yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT8, true); + yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT8_V512, true); + yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT16, true); + yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT16_V512, true); break; case TargetArchitecture.X86: @@ -1478,6 +1550,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true); + yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT8, true); + yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT8_V512, true); + yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT16, true); + yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT16_V512, true); break; } } @@ -1566,6 +1642,10 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_WAITPKG_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); break; case TargetArchitecture.X86: @@ -1621,6 +1701,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_SHA_X64); AddInstructionSet(InstructionSet.X64_WAITPKG_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); break; case TargetArchitecture.X86: @@ -1650,6 +1732,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_SHA_X64); AddInstructionSet(InstructionSet.X86_WAITPKG_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_X64); break; } } @@ -2099,6 +2183,24 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_X86Serialize; } + case "AvxVnniInt8": + if (nestedTypeName == "X64") + { return InstructionSet.X64_AVXVNNIINT8_X64; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT8_V512; } + else + { return InstructionSet.X64_AVXVNNIINT8; } + + case "AvxVnniInt16": + if (nestedTypeName == "X64") + { return InstructionSet.X64_AVXVNNIINT16_X64; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT16_V512; } + else + { return InstructionSet.X64_AVXVNNIINT16; } + } break; @@ -2271,6 +2373,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "X86Serialize": { return InstructionSet.X86_X86Serialize; } + case "AvxVnniInt8": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT8_V512; } + else + { return InstructionSet.X86_AVXVNNIINT8; } + + case "AvxVnniInt16": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT16_V512; } + else + { return InstructionSet.X86_AVXVNNIINT16; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index e9840cca3f8ac8..a7df7fbcfa0d76 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -114,10 +114,10 @@ instructionset ,X86 , ,VectorT256 ,40 ,VectorT instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 -instructionset ,X86 ,AvxVnniInt8 , ,83 ,AVXVNNIINT8 ,avxvnniint8 -instructionset ,X86 ,AvxVnniInt8_V512 , ,84 ,AVXVNNIINT8_V512 ,avxvnniint8_v512 -instructionset ,X86 ,AvxVnniInt16 , ,85 ,AVXVNNIINT16 ,avxvnniint16 -instructionset ,X86 ,AvxVnniInt16_V512 , ,86 ,AVXVNNIINT16_V512 ,avxvnniint16_v512 +instructionset ,X86 ,AvxVnniInt8 , ,84 ,AVXVNNIINT8 ,avxvnniint8 +instructionset ,X86 ,AvxVnniInt8_V512 , ,85 ,AVXVNNIINT8_V512 ,avxvnniint8_v512 +instructionset ,X86 ,AvxVnniInt16 , ,86 ,AVXVNNIINT16 ,avxvnniint16 +instructionset ,X86 ,AvxVnniInt16_V512 , ,87 ,AVXVNNIINT16_V512 ,avxvnniint16_v512 ; 64-bit Instruction Sets diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 6bf47578b81f4d..21f3b0f840ec81 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -548,14 +548,6 @@ static int Main() static bool PopcntIsSupported() => Popcnt.IsSupported; static bool PopcntX64IsSupported() => Popcnt.X64.IsSupported; -<<<<<<< HEAD - static bool AvxVnniIsSupported() => AvxVnni.IsSupported; - static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; - static bool AvxVnniInt8IsSupported() => AvxVnniInt8.IsSupported; - static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; - static bool AvxVnniInt16IsSupported() => AvxVnniInt16.IsSupported; - static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; -======= static bool AvxIsSupported() => Avx.IsSupported; static bool AvxX64IsSupported() => Avx.X64.IsSupported; @@ -618,34 +610,12 @@ static int Main() // static bool Avx512Fp16VLIsSupported() => Avx512Fp16.VL.IsSupported; // static bool Avx512Fp16X64IsSupported() => Avx512Fp16.X64.IsSupported; ->>>>>>> origin static bool Avx10v1IsSupported() => Avx10v1.IsSupported; static bool Avx10v1X64IsSupported() => Avx10v1.X64.IsSupported; static bool Avx10v1V512IsSupported() => Avx10v1.V512.IsSupported; static bool Avx10v1V512X64IsSupported() => Avx10v1.V512.X64.IsSupported; -<<<<<<< HEAD static bool Avx10v2IsSupported() => Avx10v2.IsSupported; static bool Avx10v2X64IsSupported() => Avx10v2.X64.IsSupported; - static bool Avx10v2V512IsSupported() => Avx10v2.V512.IsSupported; - static bool Avx10v2V512X64IsSupported() => Avx10v2.V512.X64.IsSupported; - static bool Avx512FIsSupported() => Avx512F.IsSupported; - static bool Avx512FVLIsSupported() => Avx512F.VL.IsSupported; - static bool Avx512FX64IsSupported() => Avx512F.X64.IsSupported; - static bool Avx512BWIsSupported() => Avx512BW.IsSupported; - static bool Avx512BWVLIsSupported() => Avx512BW.VL.IsSupported; - static bool Avx512BWX64IsSupported() => Avx512BW.X64.IsSupported; - static bool Avx512CDIsSupported() => Avx512CD.IsSupported; - static bool Avx512CDVLIsSupported() => Avx512CD.VL.IsSupported; - static bool Avx512CDX64IsSupported() => Avx512CD.X64.IsSupported; - static bool Avx512DQIsSupported() => Avx512DQ.IsSupported; - static bool Avx512DQVLIsSupported() => Avx512DQ.VL.IsSupported; - static bool Avx512DQX64IsSupported() => Avx512DQ.X64.IsSupported; - static bool Avx512VbmiIsSupported() => Avx512Vbmi.IsSupported; - static bool Avx512VbmiVLIsSupported() => Avx512Vbmi.VL.IsSupported; - static bool Avx512VbmiX64IsSupported() => Avx512Vbmi.X64.IsSupported; - static bool X86SerializeIsSupported() => X86Serialize.IsSupported; - static bool X86SerializeX64IsSupported() => X86Serialize.X64.IsSupported; -======= static bool AesIsSupported() => Aes.IsSupported; static bool AesX64IsSupported() => Aes.X64.IsSupported; @@ -662,8 +632,11 @@ static int Main() static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; + static bool AvxVnniInt8IsSupported() => AvxVnniInt8.IsSupported; + static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; + static bool AvxVnniInt16IsSupported() => AvxVnniInt16.IsSupported; + static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; ->>>>>>> origin static bool GfniIsSupported() => Gfni.IsSupported; static bool GfniV256IsSupported() => Gfni.V256.IsSupported; static bool GfniV512IsSupported() => Gfni.V512.IsSupported; From 6a65a8bc7527f3f76ccf6b82f68187673949ab78 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 11 Jun 2025 16:07:50 -0700 Subject: [PATCH 20/32] Use 2 bits / ISA to track 3 scenarios 1. AVXVNNIINT8 + AVXVNNIINT16 (VEX) 2. AVXVNNIINT8 + AVXVNNIINT16 (EVEX)3. AVXVNNIINT8 + AVXVNNIINT16 (VEX + EVEX) --- src/coreclr/inc/clrconfigvalues.h | 3 +- src/coreclr/inc/corinfoinstructionset.h | 220 +++++----- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/jit/emitxarch.cpp | 8 +- src/coreclr/jit/hwintrinsic.cpp | 394 +++++++++++++----- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 48 +-- src/coreclr/jit/hwintrinsiclistxarch.h | 89 ++-- src/coreclr/jit/hwintrinsicxarch.cpp | 26 +- src/coreclr/jit/jitconfigvalues.h | 3 +- src/coreclr/jit/lowerxarch.cpp | 48 +-- src/coreclr/jit/lsraxarch.cpp | 48 +-- .../Compiler/HardwareIntrinsicHelpers.cs | 27 +- .../Runtime/ReadyToRunInstructionSetHelper.cs | 20 +- .../JitInterface/CorInfoInstructionSet.cs | 274 ++++++------ .../ThunkGenerator/InstructionSetDesc.txt | 19 +- src/coreclr/vm/codeman.cpp | 18 +- src/native/minipal/cpufeatures.c | 9 +- src/native/minipal/cpufeatures.h | 3 +- 18 files changed, 685 insertions(+), 582 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 5f52b8e929a0d5..b721391429c014 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -694,8 +694,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VP2INTERSECT, W("EnableAVX512VP2INTERSECT"), 1, "Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXIFMA, W("EnableAVXIFMA"), 1, "Allows AVXIFMA and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI and dependent hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT8, W("EnableAVXVNNIINT8"), 1, "Allows AVXVNNI8+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT16, W("EnableAVXVNNIINT16"), 1, "Allows AVXVNNI16+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT, W("EnableAVXVNNIINT"), 1, "Allows VEX versions (AVXVNNI8 & AVXVNNIINT16) hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSHA, W("EnableSHA"), 1, "Allows SHA and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVAES, W("EnableVAES"), 1, "Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 3663c3de5eefb8..9e3c221f1bd0ac 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -90,38 +90,36 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_AVXVNNIINT8=41, - InstructionSet_AVXVNNIINT8_V512=42, - InstructionSet_AVXVNNIINT16=43, - InstructionSet_AVXVNNIINT16_V512=44, - InstructionSet_X86Base_X64=45, - InstructionSet_SSE3_X64=46, - InstructionSet_SSSE3_X64=47, - InstructionSet_SSE41_X64=48, - InstructionSet_SSE42_X64=49, - InstructionSet_POPCNT_X64=50, - InstructionSet_AVX_X64=51, - InstructionSet_AVX2_X64=52, - InstructionSet_BMI1_X64=53, - InstructionSet_BMI2_X64=54, - InstructionSet_FMA_X64=55, - InstructionSet_LZCNT_X64=56, - InstructionSet_AVX512_X64=57, - InstructionSet_AVX512VBMI_X64=58, - InstructionSet_AVX512v3_X64=59, - InstructionSet_AVX10v1_X64=60, - InstructionSet_AVX10v2_X64=61, - InstructionSet_AES_X64=62, - InstructionSet_PCLMULQDQ_X64=63, - InstructionSet_AVX512VP2INTERSECT_X64=64, - InstructionSet_AVXIFMA_X64=65, - InstructionSet_AVXVNNI_X64=66, - InstructionSet_GFNI_X64=67, - InstructionSet_AVXVNNIINT8_X64=68, - InstructionSet_AVXVNNIINT16_X64=69, - InstructionSet_SHA_X64=70, - InstructionSet_WAITPKG_X64=71, - InstructionSet_X86Serialize_X64=72, + InstructionSet_AVXVNNIINT=41, + InstructionSet_AVXVNNIINT_V512=42, + InstructionSet_X86Base_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_POPCNT_X64=48, + InstructionSet_AVX_X64=49, + InstructionSet_AVX2_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_AVX512_X64=55, + InstructionSet_AVX512VBMI_X64=56, + InstructionSet_AVX512v3_X64=57, + InstructionSet_AVX10v1_X64=58, + InstructionSet_AVX10v2_X64=59, + InstructionSet_AES_X64=60, + InstructionSet_PCLMULQDQ_X64=61, + InstructionSet_AVX512VP2INTERSECT_X64=62, + InstructionSet_AVXIFMA_X64=63, + InstructionSet_AVXVNNI_X64=64, + InstructionSet_GFNI_X64=65, + InstructionSet_AVXVNNIINT_X64=66, + InstructionSet_AVXVNNIINT_V512_X64=67, + InstructionSet_SHA_X64=68, + InstructionSet_WAITPKG_X64=69, + InstructionSet_X86Serialize_X64=70, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -164,38 +162,36 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=38, InstructionSet_VectorT256=39, InstructionSet_VectorT512=40, - InstructionSet_AVXVNNIINT8=41, - InstructionSet_AVXVNNIINT8_V512=42, - InstructionSet_AVXVNNIINT16=43, - InstructionSet_AVXVNNIINT16_V512=44, - InstructionSet_X86Base_X64=45, - InstructionSet_SSE3_X64=46, - InstructionSet_SSSE3_X64=47, - InstructionSet_SSE41_X64=48, - InstructionSet_SSE42_X64=49, - InstructionSet_POPCNT_X64=50, - InstructionSet_AVX_X64=51, - InstructionSet_AVX2_X64=52, - InstructionSet_BMI1_X64=53, - InstructionSet_BMI2_X64=54, - InstructionSet_FMA_X64=55, - InstructionSet_LZCNT_X64=56, - InstructionSet_AVX512_X64=57, - InstructionSet_AVX512VBMI_X64=58, - InstructionSet_AVX512v3_X64=59, - InstructionSet_AVX10v1_X64=60, - InstructionSet_AVX10v2_X64=61, - InstructionSet_AES_X64=62, - InstructionSet_PCLMULQDQ_X64=63, - InstructionSet_AVX512VP2INTERSECT_X64=64, - InstructionSet_AVXIFMA_X64=65, - InstructionSet_AVXVNNI_X64=66, - InstructionSet_GFNI_X64=67, - InstructionSet_AVXVNNIINT8_X64=68, - InstructionSet_AVXVNNIINT16_X64=69, - InstructionSet_SHA_X64=70, - InstructionSet_WAITPKG_X64=71, - InstructionSet_X86Serialize_X64=72, + InstructionSet_AVXVNNIINT=41, + InstructionSet_AVXVNNIINT_V512=42, + InstructionSet_X86Base_X64=43, + InstructionSet_SSE3_X64=44, + InstructionSet_SSSE3_X64=45, + InstructionSet_SSE41_X64=46, + InstructionSet_SSE42_X64=47, + InstructionSet_POPCNT_X64=48, + InstructionSet_AVX_X64=49, + InstructionSet_AVX2_X64=50, + InstructionSet_BMI1_X64=51, + InstructionSet_BMI2_X64=52, + InstructionSet_FMA_X64=53, + InstructionSet_LZCNT_X64=54, + InstructionSet_AVX512_X64=55, + InstructionSet_AVX512VBMI_X64=56, + InstructionSet_AVX512v3_X64=57, + InstructionSet_AVX10v1_X64=58, + InstructionSet_AVX10v2_X64=59, + InstructionSet_AES_X64=60, + InstructionSet_PCLMULQDQ_X64=61, + InstructionSet_AVX512VP2INTERSECT_X64=62, + InstructionSet_AVXIFMA_X64=63, + InstructionSet_AVXVNNI_X64=64, + InstructionSet_GFNI_X64=65, + InstructionSet_AVXVNNIINT_X64=66, + InstructionSet_AVXVNNIINT_V512_X64=67, + InstructionSet_SHA_X64=68, + InstructionSet_WAITPKG_X64=69, + InstructionSet_X86Serialize_X64=70, #endif // TARGET_X86 }; @@ -367,10 +363,10 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_WAITPKG_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); - if (HasInstructionSet(InstructionSet_AVXVNNIINT8)) - AddInstructionSet(InstructionSet_AVXVNNIINT8_X64); - if (HasInstructionSet(InstructionSet_AVXVNNIINT16)) - AddInstructionSet(InstructionSet_AVXVNNIINT16_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT)) + AddInstructionSet(InstructionSet_AVXVNNIINT_X64); + if (HasInstructionSet(InstructionSet_AVXVNNIINT_V512)) + AddInstructionSet(InstructionSet_AVXVNNIINT_V512_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -567,14 +563,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) @@ -637,10 +633,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) @@ -651,10 +643,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SHA); if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) @@ -677,6 +665,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -753,10 +745,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) @@ -767,10 +755,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT8_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT16_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet_SHA) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SHA); if (resultflags.HasInstructionSet(InstructionSet_AES_V256) && !resultflags.HasInstructionSet(InstructionSet_AES)) @@ -793,6 +777,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -1017,18 +1005,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; - case InstructionSet_AVXVNNIINT8 : - return "AVXVNNIINT8"; - case InstructionSet_AVXVNNIINT8_X64 : - return "AVXVNNIINT8_X64"; - case InstructionSet_AVXVNNIINT8_V512 : - return "AVXVNNIINT8_V512"; - case InstructionSet_AVXVNNIINT16 : - return "AVXVNNIINT16"; - case InstructionSet_AVXVNNIINT16_X64 : - return "AVXVNNIINT16_X64"; - case InstructionSet_AVXVNNIINT16_V512 : - return "AVXVNNIINT16_V512"; + case InstructionSet_AVXVNNIINT : + return "AVXVNNIINT"; + case InstructionSet_AVXVNNIINT_X64 : + return "AVXVNNIINT_X64"; + case InstructionSet_AVXVNNIINT_V512 : + return "AVXVNNIINT_V512"; + case InstructionSet_AVXVNNIINT_V512_X64 : + return "AVXVNNIINT_V512_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1111,14 +1095,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; - case InstructionSet_AVXVNNIINT8 : - return "AVXVNNIINT8"; - case InstructionSet_AVXVNNIINT8_V512 : - return "AVXVNNIINT8_V512"; - case InstructionSet_AVXVNNIINT16 : - return "AVXVNNIINT16"; - case InstructionSet_AVXVNNIINT16_V512 : - return "AVXVNNIINT16_V512"; + case InstructionSet_AVXVNNIINT : + return "AVXVNNIINT"; + case InstructionSet_AVXVNNIINT_V512 : + return "AVXVNNIINT_V512"; #endif // TARGET_X86 default: @@ -1223,10 +1203,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; - case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; - case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; - case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; - case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1292,10 +1272,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; - case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT8; - case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT8_V512; - case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT16; - case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT16_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index e0645c2385f0af..f397ec09bcbb14 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 79c88673-1495-45cc-881a-74cf6581740b */ - 0x79c88673, - 0x1495, - 0x45cc, - {0x88, 0x1a, 0x74, 0xcf, 0x65, 0x81, 0x74, 0x0b} +constexpr GUID JITEEVersionIdentifier = { /* 167fd9b6-7a9b-45ca-825f-d77ac79da237 */ + 0x167fd9b6, + 0x7a9b, + 0x45ca, + {0x82, 0x5f, 0xd7, 0x7a, 0xc7, 0x9d, 0xa2, 0x37} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index f1fa4da60fa7b9..1f80d9d09e8be7 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2965,8 +2965,8 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co { // no simd prefix for EVEX2 - AVX10.2 and above assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || - emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || - emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512)); } else if (isPrefix(sizePrefix)) { @@ -3181,8 +3181,8 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con { // no simd prefix for Avx-Vnni-Int* ISAs subset of instructions // INS_vpdpbuud[,s], INS_vpdpwuud[,s] - assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT8) || - emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT16)); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512)); } else if (isPrefix(sizePrefix)) { diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index e4cb2855915021..08961ac0442e3c 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -942,10 +942,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 - { FIRST_NI_AVXVNNIINT8, LAST_NI_AVXVNNIINT8 }, // AVXVNNIINT8 - { FIRST_NI_AVXVNNIINT8_V512, LAST_NI_AVXVNNIINT8_V512 }, // AVXVNNIINT8V512 - { FIRST_NI_AVXVNNIINT16, LAST_NI_AVXVNNIINT16 }, // AVXVNNIINT16 - { FIRST_NI_AVXVNNIINT16_V512, LAST_NI_AVXVNNIINT16_V512 }, // AVXVNNIINT16V512 + { FIRST_NI_AVXVNNIINT, LAST_NI_AVXVNNIINT }, // AVXVNNIINT + { FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 }, // AVXVNNIINT_V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64 { NI_Illegal, NI_Illegal }, // SSE3_X64 @@ -970,6 +968,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // AVXIFMA_X64 { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 { NI_Illegal, NI_Illegal }, // GFNI_X64 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT_X64 + { NI_Illegal, NI_Illegal }, // AVXVNNIINT_V512_X64 { NI_Illegal, NI_Illegal }, // SHA_X64 { NI_Illegal, NI_Illegal }, // WAITPKG_X64 { NI_Illegal, NI_Illegal }, // X86Serialize_X64 @@ -1207,6 +1207,26 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, CORINFO_InstructionSet isa = lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName); +#ifdef TARGET_XARCH + // This handling makes sure that if we dont have VEX version of AVXVNNIINT instructions, + // we try to use the EVEX version. + // AVXVNNIINT tracks the VEX instructions where as + // AVXVNNIINT_V512 tracks the EVEX versions of same instructions. + if (isa == InstructionSet_AVXVNNIINT || isa == InstructionSet_AVXVNNIINT_X64) + { + if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) + { + if (isa = InstructionSet_AVXVNNIINT) + { + isa = InstructionSet_AVXVNNIINT_V512; + } + else + { + isa = InstructionSet_AVXVNNIINT_V512_X64; + } + } + } +#endif if (isa == InstructionSet_ILLEGAL) { return NI_Illegal; @@ -2403,127 +2423,305 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { #if defined(TARGET_XARCH) - case NI_AVXVNNIINT8_MultiplyWideningAndAdd: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || - (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte - : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte); - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAdd: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || - (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = - (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte - : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte); - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || - (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = - (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate - : ((op3Type == TYP_UBYTE) ? NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate - : NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate); - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_BYTE && (op3Type == TYP_UBYTE || op3Type == TYP_BYTE)) || - (op2Type == TYP_UBYTE && op3Type == TYP_UBYTE)); - intrinsic = (op2Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate - : ((op3Type == TYP_UBYTE) - ? NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate - : NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate); - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT16_MultiplyWideningAndAdd: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || - (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16 - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16 - : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16); + switch (op2Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddByteByte; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte; + break; + } + + case TYP_BYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16; + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || - (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = - (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16 - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16 - : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16); + switch (op2Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate; + break; + } + + case TYP_BYTE: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate; + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } - case NI_AVXVNNIINT16_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || - (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = - (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate - : ((op3Type == TYP_USHORT) ? NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate - : NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate); + switch (op2Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte; + break; + } + + case TYP_BYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16; + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: { var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - assert((op2Type == TYP_USHORT && (op3Type == TYP_USHORT || op3Type == TYP_SHORT)) || - (op2Type == TYP_SHORT && op3Type == TYP_USHORT)); - intrinsic = (op2Type == TYP_SHORT) - ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate - : ((op3Type == TYP_USHORT) - ? NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate - : NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate); + switch (op2Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate; + break; + } + + case TYP_BYTE: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate; + break; + } + + case TYP_SHORT: + { + intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } retNode = gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 3536bf5ca4a1bf..2378b89c8868a9 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -851,30 +851,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: { assert(targetReg != REG_NA); assert(op1Reg != REG_NA); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 5efb00f2c09afd..81bcb17373fbf9 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1052,65 +1052,46 @@ HARDWARE_INTRINSIC(AVX10v2, StoreScalar, // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVXVNNIINT8 Intrinsics -#define FIRST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT8 NI_AVXVNNIINT8_MultiplyWideningAndAddSaturate +// AVXVNNIINT16 Intrinsics +#define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +#define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVXVNNIINT8_V512 Intrinsics -#define FIRST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAdd, 64, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByte, 64, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddByteByteSaturate, 64, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByte, 64, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteByteSaturate, 64, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByte, 64, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSByteSByteSaturate, 64, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT8_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT8_V512 NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT16 Intrinsics -#define FIRST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT16 NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVXVNNIINT8_V512 Intrinsics -#define FIRST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16Int16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT16_V512, MultiplyWideningAndAddUInt16UInt16Saturate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT16_V512 NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate +#define FIRST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +#define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 13c34fc4d868d6..5011d828da109a 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -62,10 +62,10 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVXIFMA_X64; case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; - case InstructionSet_AVXVNNIINT8: - return InstructionSet_AVXVNNIINT8_X64; - case InstructionSet_AVXVNNIINT16: - return InstructionSet_AVXVNNIINT16_X64; + case InstructionSet_AVXVNNIINT: + return InstructionSet_AVXVNNIINT_X64; + case InstructionSet_AVXVNNIINT_V512: + return InstructionSet_AVXVNNIINT_V512_X64; case InstructionSet_GFNI: return InstructionSet_GFNI_X64; case InstructionSet_SHA: @@ -177,14 +177,10 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_PCLMULQDQ_V512; } - case InstructionSet_AVXVNNIINT8: + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: { - return InstructionSet_AVXVNNIINT8_V512; - } - - case InstructionSet_AVXVNNIINT16: - { - return InstructionSet_AVXVNNIINT16_V512; + return InstructionSet_AVXVNNIINT_V512; } default: @@ -231,13 +227,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } else if (strncmp(className + 3, "VnniInt", 7) == 0) { - if (strcmp(className + 10, "8") == 0) - { - return InstructionSet_AVXVNNIINT8; - } - else if (strcmp(className + 10, "16") == 0) + if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) { - return InstructionSet_AVXVNNIINT16; + return InstructionSet_AVXVNNIINT; } } else if (strcmp(className + 3, "2") == 0) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index cfaebd218f21da..b9b610594184ca 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -414,8 +414,7 @@ RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", RELEASE_CONFIG_INTEGER(EnableAVX512VP2INTERSECT, "EnableAVX512VP2INTERSECT", 1) // Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXIFMA, "EnableAVXIFMA", 1) // Allows AVXIFMA and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI and dependent hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT8, "EnableAVXVNNIINT8", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT16, "EnableAVXVNNIINT16", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT, "EnableAVXVNNIINT", 1) // Allows VEX AVXVNNIINT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableGFNI, "EnableGFNI", 1) // Allows GFNI and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSHA, "EnableSHA", 1) // Allows SHA and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableVAES, "EnableVAES", 1) // Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 602bd962bada5d..579ff37c0c0155 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10493,30 +10493,30 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: { TryMakeSrcContainedOrRegOptional(node, op3); break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 44d6176846274e..e2602cc1e6cf36 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2753,30 +2753,30 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT8_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT16_V512_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: { assert(numArgs == 3); diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 8f37d1185ff089..bbaa8102b558ae 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -81,8 +81,7 @@ private static class XArchIntrinsicConstants public const int Vaes = (1 << 15); public const int WaitPkg = (1 << 16); public const int X86Serialize = (1 << 17); - public const int AvxVnniInt8 = (1 << 18); - public const int AvxVnniInt16 = (1 << 19); + public const int AvxVnniInt = (1 << 18); public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -115,15 +114,13 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) if ((flags & Avx10v1) != 0) builder.AddSupportedInstructionSet("avx10v1"); if ((flags & Avx10v2) != 0) + { builder.AddSupportedInstructionSet("avx10v2"); - if ((flags & AvxVnniInt8) != 0) - builder.AddSupportedInstructionSet("avxvnniint8"); - if (((flags & AvxVnniInt8) != 0) && ((flags & Avx512) != 0)) - builder.AddSupportedInstructionSet("avxvnniint8_v512"); - if ((flags & AvxVnniInt16) != 0) - builder.AddSupportedInstructionSet("avxvnniint16"); - if (((flags & AvxVnniInt16) != 0) && ((flags & Avx512) != 0)) - builder.AddSupportedInstructionSet("avxvnniint16_v512"); + builder.AddSupportedInstructionSet("avxvnniint"); + builder.AddSupportedInstructionSet("avxvnniint_v512"); + } + if ((flags & AvxVnniInt) != 0) + builder.AddSupportedInstructionSet("avxvnniint"); if ((flags & Apx) != 0) builder.AddSupportedInstructionSet("apx"); @@ -239,12 +236,10 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_X64 => Gfni, InstructionSet.X64_GFNI_V256 => (Gfni | Avx), InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), - InstructionSet.X64_AVXVNNIINT8 => AvxVnniInt8, - InstructionSet.X64_AVXVNNIINT8_X64 => AvxVnniInt8, - InstructionSet.X64_AVXVNNIINT8_V512 => (AvxVnniInt8 | Avx10v2), - InstructionSet.X64_AVXVNNIINT16 => AvxVnniInt16, - InstructionSet.X64_AVXVNNIINT16_X64 => AvxVnniInt16, - InstructionSet.X64_AVXVNNIINT16_V512 => (AvxVnniInt16 | Avx10v2), + InstructionSet.X64_AVXVNNIINT => AvxVnniInt, + InstructionSet.X64_AVXVNNIINT_X64 => AvxVnniInt, + InstructionSet.X64_AVXVNNIINT_V512 => Avx10v2, + InstructionSet.X64_AVXVNNIINT_V512_X64 => Avx10v2, InstructionSet.X64_SHA => Sha, InstructionSet.X64_SHA_X64 => Sha, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 362b79692e5373..560e10513d4940 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -137,12 +137,10 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; - case InstructionSet.X64_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; - case InstructionSet.X64_AVXVNNIINT8_X64: return ReadyToRunInstructionSet.AvxVnniInt8; - case InstructionSet.X64_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; - case InstructionSet.X64_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; - case InstructionSet.X64_AVXVNNIINT16_X64: return ReadyToRunInstructionSet.AvxVnniInt16; - case InstructionSet.X64_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; + case InstructionSet.X64_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT_X64: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X64_AVXVNNIINT_V512_X64: return ReadyToRunInstructionSet.AvxVnniInt8_V512; default: throw new Exception("Unknown instruction set"); } @@ -218,12 +216,10 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; - case InstructionSet.X86_AVXVNNIINT8: return ReadyToRunInstructionSet.AvxVnniInt8; - case InstructionSet.X86_AVXVNNIINT8_X64: return null; - case InstructionSet.X86_AVXVNNIINT8_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; - case InstructionSet.X86_AVXVNNIINT16: return ReadyToRunInstructionSet.AvxVnniInt16; - case InstructionSet.X86_AVXVNNIINT16_X64: return null; - case InstructionSet.X86_AVXVNNIINT16_V512: return ReadyToRunInstructionSet.AvxVnniInt16_V512; + case InstructionSet.X86_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X86_AVXVNNIINT_X64: return null; + case InstructionSet.X86_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; + case InstructionSet.X86_AVXVNNIINT_V512_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index e1a6a50ee25ad8..5ceaba283d9c65 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -86,10 +86,8 @@ public enum InstructionSet X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, - X64_AVXVNNIINT8 = InstructionSet_X64.AVXVNNIINT8, - X64_AVXVNNIINT8_V512 = InstructionSet_X64.AVXVNNIINT8_V512, - X64_AVXVNNIINT16 = InstructionSet_X64.AVXVNNIINT16, - X64_AVXVNNIINT16_V512 = InstructionSet_X64.AVXVNNIINT16_V512, + X64_AVXVNNIINT = InstructionSet_X64.AVXVNNIINT, + X64_AVXVNNIINT_V512 = InstructionSet_X64.AVXVNNIINT_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE3_X64 = InstructionSet_X64.SSE3_X64, X64_SSSE3_X64 = InstructionSet_X64.SSSE3_X64, @@ -113,8 +111,8 @@ public enum InstructionSet X64_AVXIFMA_X64 = InstructionSet_X64.AVXIFMA_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, - X64_AVXVNNIINT8_X64 = InstructionSet_X64.AVXVNNIINT8_X64, - X64_AVXVNNIINT16_X64 = InstructionSet_X64.AVXVNNIINT16_X64, + X64_AVXVNNIINT_X64 = InstructionSet_X64.AVXVNNIINT_X64, + X64_AVXVNNIINT_V512_X64 = InstructionSet_X64.AVXVNNIINT_V512_X64, X64_SHA_X64 = InstructionSet_X64.SHA_X64, X64_WAITPKG_X64 = InstructionSet_X64.WAITPKG_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, @@ -158,10 +156,8 @@ public enum InstructionSet X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, - X86_AVXVNNIINT8 = InstructionSet_X86.AVXVNNIINT8, - X86_AVXVNNIINT8_V512 = InstructionSet_X86.AVXVNNIINT8_V512, - X86_AVXVNNIINT16 = InstructionSet_X86.AVXVNNIINT16, - X86_AVXVNNIINT16_V512 = InstructionSet_X86.AVXVNNIINT16_V512, + X86_AVXVNNIINT = InstructionSet_X86.AVXVNNIINT, + X86_AVXVNNIINT_V512 = InstructionSet_X86.AVXVNNIINT_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE3_X64 = InstructionSet_X86.SSE3_X64, X86_SSSE3_X64 = InstructionSet_X86.SSSE3_X64, @@ -185,8 +181,8 @@ public enum InstructionSet X86_AVXIFMA_X64 = InstructionSet_X86.AVXIFMA_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, - X86_AVXVNNIINT8_X64 = InstructionSet_X86.AVXVNNIINT8_X64, - X86_AVXVNNIINT16_X64 = InstructionSet_X86.AVXVNNIINT16_X64, + X86_AVXVNNIINT_X64 = InstructionSet_X86.AVXVNNIINT_X64, + X86_AVXVNNIINT_V512_X64 = InstructionSet_X86.AVXVNNIINT_V512_X64, X86_SHA_X64 = InstructionSet_X86.SHA_X64, X86_WAITPKG_X64 = InstructionSet_X86.WAITPKG_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, @@ -277,38 +273,36 @@ public enum InstructionSet_X64 VectorT128 = 38, VectorT256 = 39, VectorT512 = 40, - AVXVNNIINT8 = 41, - AVXVNNIINT8_V512 = 42, - AVXVNNIINT16 = 43, - AVXVNNIINT16_V512 = 44, - X86Base_X64 = 45, - SSE3_X64 = 46, - SSSE3_X64 = 47, - SSE41_X64 = 48, - SSE42_X64 = 49, - POPCNT_X64 = 50, - AVX_X64 = 51, - AVX2_X64 = 52, - BMI1_X64 = 53, - BMI2_X64 = 54, - FMA_X64 = 55, - LZCNT_X64 = 56, - AVX512_X64 = 57, - AVX512VBMI_X64 = 58, - AVX512v3_X64 = 59, - AVX10v1_X64 = 60, - AVX10v2_X64 = 61, - AES_X64 = 62, - PCLMULQDQ_X64 = 63, - AVX512VP2INTERSECT_X64 = 64, - AVXIFMA_X64 = 65, - AVXVNNI_X64 = 66, - GFNI_X64 = 67, - AVXVNNIINT8_X64 = 68, - AVXVNNIINT16_X64 = 69, - SHA_X64 = 70, - WAITPKG_X64 = 71, - X86Serialize_X64 = 72, + AVXVNNIINT = 41, + AVXVNNIINT_V512 = 42, + X86Base_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + POPCNT_X64 = 48, + AVX_X64 = 49, + AVX2_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + AVX512_X64 = 55, + AVX512VBMI_X64 = 56, + AVX512v3_X64 = 57, + AVX10v1_X64 = 58, + AVX10v2_X64 = 59, + AES_X64 = 60, + PCLMULQDQ_X64 = 61, + AVX512VP2INTERSECT_X64 = 62, + AVXIFMA_X64 = 63, + AVXVNNI_X64 = 64, + GFNI_X64 = 65, + AVXVNNIINT_X64 = 66, + AVXVNNIINT_V512_X64 = 67, + SHA_X64 = 68, + WAITPKG_X64 = 69, + X86Serialize_X64 = 70, } public enum InstructionSet_X86 @@ -355,38 +349,36 @@ public enum InstructionSet_X86 VectorT128 = 38, VectorT256 = 39, VectorT512 = 40, - AVXVNNIINT8 = 41, - AVXVNNIINT8_V512 = 42, - AVXVNNIINT16 = 43, - AVXVNNIINT16_V512 = 44, - X86Base_X64 = 45, - SSE3_X64 = 46, - SSSE3_X64 = 47, - SSE41_X64 = 48, - SSE42_X64 = 49, - POPCNT_X64 = 50, - AVX_X64 = 51, - AVX2_X64 = 52, - BMI1_X64 = 53, - BMI2_X64 = 54, - FMA_X64 = 55, - LZCNT_X64 = 56, - AVX512_X64 = 57, - AVX512VBMI_X64 = 58, - AVX512v3_X64 = 59, - AVX10v1_X64 = 60, - AVX10v2_X64 = 61, - AES_X64 = 62, - PCLMULQDQ_X64 = 63, - AVX512VP2INTERSECT_X64 = 64, - AVXIFMA_X64 = 65, - AVXVNNI_X64 = 66, - GFNI_X64 = 67, - AVXVNNIINT8_X64 = 68, - AVXVNNIINT16_X64 = 69, - SHA_X64 = 70, - WAITPKG_X64 = 71, - X86Serialize_X64 = 72, + AVXVNNIINT = 41, + AVXVNNIINT_V512 = 42, + X86Base_X64 = 43, + SSE3_X64 = 44, + SSSE3_X64 = 45, + SSE41_X64 = 46, + SSE42_X64 = 47, + POPCNT_X64 = 48, + AVX_X64 = 49, + AVX2_X64 = 50, + BMI1_X64 = 51, + BMI2_X64 = 52, + FMA_X64 = 53, + LZCNT_X64 = 54, + AVX512_X64 = 55, + AVX512VBMI_X64 = 56, + AVX512v3_X64 = 57, + AVX10v1_X64 = 58, + AVX10v2_X64 = 59, + AES_X64 = 60, + PCLMULQDQ_X64 = 61, + AVX512VP2INTERSECT_X64 = 62, + AVXIFMA_X64 = 63, + AVXVNNI_X64 = 64, + GFNI_X64 = 65, + AVXVNNIINT_X64 = 66, + AVXVNNIINT_V512_X64 = 67, + SHA_X64 = 68, + WAITPKG_X64 = 69, + X86Serialize_X64 = 70, } public unsafe struct InstructionSetFlags : IEnumerable @@ -725,14 +717,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3)) @@ -795,10 +787,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) @@ -809,10 +797,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_SHA)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AES_V256)) @@ -835,6 +819,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) @@ -912,10 +900,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) @@ -926,10 +910,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_SHA)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_AES_V256)) @@ -952,6 +932,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) @@ -1093,10 +1077,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) @@ -1159,10 +1143,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) @@ -1173,10 +1153,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SHA); if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) @@ -1199,6 +1175,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -1276,10 +1256,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVXIFMA); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) @@ -1290,10 +1266,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_SHA); if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) @@ -1316,6 +1288,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) @@ -1477,10 +1453,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true); - yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT8, true); - yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT8_V512, true); - yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT16, true); - yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT16_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT_V512, true); break; case TargetArchitecture.X86: @@ -1550,10 +1526,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true); - yield return new InstructionSetInfo("avxvnniint8", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT8, true); - yield return new InstructionSetInfo("avxvnniint8_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT8_V512, true); - yield return new InstructionSetInfo("avxvnniint16", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT16, true); - yield return new InstructionSetInfo("avxvnniint16_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT16_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT_V512, true); break; } } @@ -1642,10 +1618,10 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_WAITPKG_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT8)) - AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); - if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT16)) - AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) + AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); break; case TargetArchitecture.X86: @@ -1701,8 +1677,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_SHA_X64); AddInstructionSet(InstructionSet.X64_WAITPKG_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - AddInstructionSet(InstructionSet.X64_AVXVNNIINT8_X64); - AddInstructionSet(InstructionSet.X64_AVXVNNIINT16_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); break; case TargetArchitecture.X86: @@ -1732,8 +1708,8 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_SHA_X64); AddInstructionSet(InstructionSet.X86_WAITPKG_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); - AddInstructionSet(InstructionSet.X86_AVXVNNIINT8_X64); - AddInstructionSet(InstructionSet.X86_AVXVNNIINT16_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNIINT_V512_X64); break; } } @@ -2185,21 +2161,27 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "AvxVnniInt8": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVXVNNIINT8_X64; } + { return InstructionSet.X64_AVXVNNIINT_X64; } + else + if (nestedTypeName == "V512_X64") + { return InstructionSet.X64_AVXVNNIINT_V512_X64; } else if (nestedTypeName == "V512") - { return InstructionSet.X64_AVXVNNIINT8_V512; } + { return InstructionSet.X64_AVXVNNIINT_V512; } else - { return InstructionSet.X64_AVXVNNIINT8; } + { return InstructionSet.X64_AVXVNNIINT; } case "AvxVnniInt16": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVXVNNIINT16_X64; } + { return InstructionSet.X64_AVXVNNIINT_X64; } + else + if (nestedTypeName == "V512_X64") + { return InstructionSet.X64_AVXVNNIINT_V512_X64; } else if (nestedTypeName == "V512") - { return InstructionSet.X64_AVXVNNIINT16_V512; } + { return InstructionSet.X64_AVXVNNIINT_V512; } else - { return InstructionSet.X64_AVXVNNIINT16; } + { return InstructionSet.X64_AVXVNNIINT; } } break; @@ -2375,15 +2357,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "AvxVnniInt8": if (nestedTypeName == "V512") - { return InstructionSet.X86_AVXVNNIINT8_V512; } + { return InstructionSet.X86_AVXVNNIINT_V512; } else - { return InstructionSet.X86_AVXVNNIINT8; } + { return InstructionSet.X86_AVXVNNIINT; } case "AvxVnniInt16": if (nestedTypeName == "V512") - { return InstructionSet.X86_AVXVNNIINT16_V512; } + { return InstructionSet.X86_AVXVNNIINT_V512; } else - { return InstructionSet.X86_AVXVNNIINT16; } + { return InstructionSet.X86_AVXVNNIINT; } } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index a7df7fbcfa0d76..2b387bb1310f2e 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -114,10 +114,10 @@ instructionset ,X86 , ,VectorT256 ,40 ,VectorT instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 -instructionset ,X86 ,AvxVnniInt8 , ,84 ,AVXVNNIINT8 ,avxvnniint8 -instructionset ,X86 ,AvxVnniInt8_V512 , ,85 ,AVXVNNIINT8_V512 ,avxvnniint8_v512 -instructionset ,X86 ,AvxVnniInt16 , ,86 ,AVXVNNIINT16 ,avxvnniint16 -instructionset ,X86 ,AvxVnniInt16_V512 , ,87 ,AVXVNNIINT16_V512 ,avxvnniint16_v512 +instructionset ,X86 ,AvxVnniInt8 , ,84 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt8_V512 , ,85 ,AVXVNNIINT_V512 ,avxvnniint_v512 +instructionset ,X86 ,AvxVnniInt16 , ,86 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt16_V512 , ,87 ,AVXVNNIINT_V512 ,avxvnniint_v512 ; 64-bit Instruction Sets @@ -152,8 +152,8 @@ instructionset64bit,X86 ,AVX512VP2INTERSECT instructionset64bit,X86 ,AVXIFMA instructionset64bit,X86 ,AVXVNNI instructionset64bit,X86 ,GFNI -instructionset64bit,X86 ,AVXVNNIINT8 -instructionset64bit,X86 ,AVXVNNIINT16 +instructionset64bit,X86 ,AVXVNNIINT +instructionset64bit,X86 ,AVXVNNIINT_V512 instructionset64bit,X86 ,SHA instructionset64bit,X86 ,WAITPKG instructionset64bit,X86 ,X86Serialize @@ -203,8 +203,6 @@ implication ,X86 ,PCLMULQDQ ,AES implication ,X86 ,AVX512VP2INTERSECT ,AVX512 implication ,X86 ,AVXIFMA ,AVX2 implication ,X86 ,AVXVNNI ,AVX2 -implication ,X86 ,AVXVNNIINT8 ,AVX2 -implication ,X86 ,AVXVNNIINT16 ,AVX2 implication ,X86 ,GFNI ,SSE42 implication ,X86 ,GFNI_V256 ,GFNI @@ -212,8 +210,6 @@ implication ,X86 ,GFNI_V256 ,AVX implication ,X86 ,GFNI_V512 ,GFNI implication ,X86 ,GFNI_V512 ,AVX512 -implication ,X86 ,AVXVNNIINT8_V512 ,AVX10v2 -implication ,X86 ,AVXVNNIINT16_V512 ,AVX10v2 implication ,X86 ,SHA ,X86Base implication ,X86 ,AES_V256 ,AES @@ -228,6 +224,9 @@ implication ,X86 ,PCLMULQDQ_V512 ,AES_V512 implication ,X86 ,WAITPKG ,X86Base implication ,X86 ,X86Serialize ,X86Base +implication ,X86 ,AVXVNNIINT ,AVX2 +implication ,X86 ,AVXVNNIINT_V512 ,AVX10v2 + ; These synthetic ISAs need to appear after the core ISAs ; as they depend on the other implications being correct first ; otherwise they may not be disabled if the required isa is disabled diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 9d7d53c934f163..da11f663ea92a7 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1315,6 +1315,7 @@ void EEJitManager::SetCpuInfo() if (((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) { CPUCompileFlags.Set(InstructionSet_AVX10v2); + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT_V512); } #if defined(TARGET_AMD64) @@ -1359,22 +1360,9 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_SHA); } - if ((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt8) != 0) + if (((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT)) { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT8)) - { - CPUCompileFlags.Set(InstructionSet_AVXVNNIINT8); - CPUCompileFlags.Set(InstructionSet_AVXVNNIINT8_V512); - } - } - - if ((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt16) != 0) - { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT16)) - { - CPUCompileFlags.Set(InstructionSet_AVXVNNIINT16); - CPUCompileFlags.Set(InstructionSet_AVXVNNIINT16_V512); - } + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT); } if (((cpuFeatures & XArchIntrinsicConstants_Vaes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableVAES)) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 28dc0d79a14eee..c477e3cfbdefba 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -385,14 +385,9 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } - if ((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) // AVX-VNNI-INT8 + if (((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) && ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0)) // AVX-VNNI-INT8 && AVX-VNNI-INT16 { - result |= XArchIntrinsicConstants_AvxVnniInt8; - } - - if ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0) // AVX-VNNI-INT16 - { - result |= XArchIntrinsicConstants_AvxVnniInt16; + result |= XArchIntrinsicConstants_AvxVnniInt; } if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0) // AVX-IFMA diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 24b400065f5898..92284d18899d7e 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -29,8 +29,7 @@ #define XArchIntrinsicConstants_Vaes (1 << 15) #define XArchIntrinsicConstants_WaitPkg (1 << 16) #define XArchIntrinsicConstants_X86Serialize (1 << 17) -#define XArchIntrinsicConstants_AvxVnniInt8 (1 << 18) -#define XArchIntrinsicConstants_AvxVnniInt16 (1 << 19) +#define XArchIntrinsicConstants_AvxVnniInt (1 << 18) #endif // HOST_X86 || HOST_AMD64 #if defined(HOST_ARM64) From 8cf1d5fef52880867c6f640d77300716dcd8bca0 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 11 Jun 2025 22:55:26 -0700 Subject: [PATCH 21/32] Enable APIs for AVXVNNIINT and AVXVNNIINT_V512 in JIT --- src/coreclr/jit/emitxarch.cpp | 32 +++++++++++++++++++------- src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++-- src/coreclr/jit/instrsxarch.h | 4 +++- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index cb88c2d8ca844a..19dd6f4c8ec77b 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -92,17 +92,17 @@ bool emitter::IsApxOnlyInstruction(instruction ins) bool emitter::IsAVXVNNIInstruction(instruction ins) { - return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION); + return (ins >= FIRST_AVXVNNI_INSTRUCTION) && (ins <= LAST_AVXVNNI_INSTRUCTION); } bool emitter::IsAVXVNNIINT8Instruction(instruction ins) { - return (ins >= INS_FIRST_AVXVNNIINT8_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT8_INSTRUCTION); + return (ins >= FIRST_AVXVNNIINT8_INSTRUCTION) && (ins <= LAST_AVXVNNIINT8_INSTRUCTION); } bool emitter::IsAVXVNNIINT16Instruction(instruction ins) { - return (ins >= INS_FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= INS_LAST_AVXVNNIINT16_INSTRUCTION); + return (ins >= FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= LAST_AVXVNNIINT16_INSTRUCTION); } bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) @@ -132,9 +132,9 @@ bool emitter::Is3OpRmwInstruction(instruction ins) default: { - return ((ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION)) || + return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) || (IsAVXVNNIFamilyInstruction(ins)) || - ((ins >= INS_FIRST_AVXIFMA_INSTRUCTION) && (ins <= INS_LAST_AVXIFMA_INSTRUCTION)); + ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)); } } } @@ -298,6 +298,23 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNI); } + case INS_vpdpwsud: + case INS_vpdpwsuds: + case INS_vpdpwusd: + case INS_vpdpwusds: + case INS_vpdpwuud: + case INS_vpdpwuuds: + case INS_vpdpbssd: + case INS_vpdpbssds: + case INS_vpdpbsud: + case INS_vpdpbsuds: + case INS_vpdpbuud: + case INS_vpdpbuuds: + { + // Vex versions of AvxVnniInt8 + AvxVnniInt16 + return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT); + } + case INS_vpmadd52huq: case INS_vpmadd52luq: { @@ -358,9 +375,8 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const case INS_vpdpbuud: case INS_vpdpbuuds: { - // Evex versions of AvxVnniInt8 and AvxVnniInt16 will be supported - // with Avx10.2 ISA. - return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2); + // Evex versions of AvxVnniInt8 + AvxVnniInt16 will be supported + return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT_V512); } case INS_vpdpbusd: diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 8fbb56dfd4b44b..ed26c29f5f5a1c 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1061,7 +1061,7 @@ HARDWARE_INTRINSIC(AVX10v2, StoreScalar, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT16 Intrinsics #define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) @@ -1070,7 +1070,7 @@ HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByte, HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 2ddadfedd25ffe..dd69092e39a413 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1106,7 +1106,9 @@ INST3(vminmaxsd, "minmaxsd", IUM_WR, BAD_CODE, BAD_ INST3(vminmaxss, "minmaxss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x53), INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum/Maximum scalar single INST3(vmovd_simd, "movd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs INST3(vmovw_simd, "movw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs -INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL_MEM, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags +INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference +INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags +INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags #define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results From 04c586a803e52c3bd8fa7d8a2cbc2b753029ae63 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 12 Jun 2025 14:33:43 -0700 Subject: [PATCH 22/32] Resolve CI errors --- src/coreclr/jit/hwintrinsic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index ab04e372293700..55f933d3124854 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1195,7 +1195,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, { if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) { - if (isa = InstructionSet_AVXVNNIINT) + if (isa == InstructionSet_AVXVNNIINT) { isa = InstructionSet_AVXVNNIINT_V512; } From 08393c4e3a1a2e452cb73c9044cb0b9d6f7f8a54 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 13 Jun 2025 16:02:19 -0700 Subject: [PATCH 23/32] Refactor code for better perf and nit reviews --- src/coreclr/inc/jiteeversionguid.h | 10 ++--- src/coreclr/inc/readytoruninstructionset.h | 8 ++-- src/coreclr/jit/emitxarch.cpp | 11 ++--- src/coreclr/jit/emitxarch.h | 3 +- src/coreclr/jit/hwintrinsic.cpp | 4 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 34 +-------------- src/coreclr/jit/hwintrinsiclistxarch.h | 20 ++++----- src/coreclr/jit/hwintrinsicxarch.cpp | 21 +++++---- src/coreclr/jit/instrsxarch.h | 35 +++++++-------- src/coreclr/jit/lowerxarch.cpp | 43 ++----------------- .../Common/Compiler/InstructionSetSupport.cs | 7 --- .../tools/Common/InstructionSetHelpers.cs | 6 +-- .../Runtime/ReadyToRunInstructionSet.cs | 8 ++-- .../ThunkGenerator/InstructionSetDesc.txt | 8 ++-- src/native/minipal/cpufeatures.c | 7 ++- 15 files changed, 76 insertions(+), 149 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 36727fa8b91bbb..2281c932fbace5 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 2227cf05-e78b-4c27-83ae-d979c594c112 */ - 0x2227cf05, - 0xe78b, - 0x4c27, - {0x83, 0xae, 0xd9, 0x79, 0xc5, 0x94, 0xc1, 0x12} +constexpr GUID JITEEVersionIdentifier = { /* ee012cc2-8258-4151-9ae7-4598180e92ce */ + 0xee012cc2, + 0x8258, + 0x4151, + {0x9a, 0xe7, 0x45, 0x98, 0x18, 0x0e, 0x92, 0xce} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 563f5f697e2be7..ee9e5fdc443702 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -67,6 +67,10 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Zba=57, READYTORUN_INSTRUCTION_Zbb=58, READYTORUN_INSTRUCTION_Sve2=59, + READYTORUN_INSTRUCTION_AvxVnniInt8=60, + READYTORUN_INSTRUCTION_AvxVnniInt8_V512=61, + READYTORUN_INSTRUCTION_AvxVnniInt16=62, + READYTORUN_INSTRUCTION_AvxVnniInt16_V512=63, READYTORUN_INSTRUCTION_Aes_V256=64, READYTORUN_INSTRUCTION_Aes_V512=65, READYTORUN_INSTRUCTION_AvxIfma=66, @@ -87,10 +91,6 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx512Vp2intersect_VL=81, READYTORUN_INSTRUCTION_Avx512Vpopcntdq=82, READYTORUN_INSTRUCTION_Avx512Vpopcntdq_VL=83, - READYTORUN_INSTRUCTION_AvxVnniInt8=84, - READYTORUN_INSTRUCTION_AvxVnniInt8_V512=85, - READYTORUN_INSTRUCTION_AvxVnniInt16=86, - READYTORUN_INSTRUCTION_AvxVnniInt16_V512=87, }; diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 19dd6f4c8ec77b..d274a306745c3f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -90,9 +90,11 @@ bool emitter::IsApxOnlyInstruction(instruction ins) return (ins >= FIRST_APX_INSTRUCTION) && (ins <= LAST_APX_INSTRUCTION); } -bool emitter::IsAVXVNNIInstruction(instruction ins) +bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) { - return (ins >= FIRST_AVXVNNI_INSTRUCTION) && (ins <= LAST_AVXVNNI_INSTRUCTION); + return (ins >= FIRST_AVXVNNI_INSTRUCTION && ins <= LAST_AVXVNNI_INSTRUCTION) || + (ins >= FIRST_AVXVNNIINT8_INSTRUCTION && ins <= LAST_AVXVNNIINT8_INSTRUCTION) || + (ins >= FIRST_AVXVNNIINT16_INSTRUCTION && ins <= LAST_AVXVNNIINT16_INSTRUCTION); } bool emitter::IsAVXVNNIINT8Instruction(instruction ins) @@ -105,11 +107,6 @@ bool emitter::IsAVXVNNIINT16Instruction(instruction ins) return (ins >= FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= LAST_AVXVNNIINT16_INSTRUCTION); } -bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) -{ - return (IsAVXVNNIInstruction(ins) || IsAVXVNNIINT8Instruction(ins) || IsAVXVNNIINT16Instruction(ins)); -} - bool emitter::Is3OpRmwInstruction(instruction ins) { switch (ins) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index c397bbb82f2fd6..6970afcc136e7c 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -121,10 +121,9 @@ static bool IsSSEOrAVXInstruction(instruction ins); static bool IsAVXOnlyInstruction(instruction ins); static bool IsAvx512OnlyInstruction(instruction ins); static bool IsKMOVInstruction(instruction ins); -static bool IsAVXVNNIInstruction(instruction ins); +static bool IsAVXVNNIFamilyInstruction(instruction ins); static bool IsAVXVNNIINT8Instruction(instruction ins); static bool IsAVXVNNIINT16Instruction(instruction ins); -static bool IsAVXVNNIFamilyInstruction(instruction ins); static bool Is3OpRmwInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 55f933d3124854..afd047aa7da488 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -930,8 +930,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 - { FIRST_NI_AVXVNNIINT, LAST_NI_AVXVNNIINT }, // AVXVNNIINT - { FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 }, // AVXVNNIINT_V512 + { FIRST_NI_AVXVNNIINT, LAST_NI_AVXVNNIINT }, // AVXVNNIINT + { FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 }, // AVXVNNIINT_V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64 { FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 }, // SSE42_X64 diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 6d27c762e25671..ceccebb5121e04 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -849,45 +849,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVXVNNI_MultiplyWideningAndAdd: - case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: + default: { + assert(intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512); assert(targetReg != REG_NA); assert(op1Reg != REG_NA); assert(op2Reg != REG_NA); genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); break; - } - - default: - { - unreached(); - break; }; } } diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index ed26c29f5f5a1c..dbb9741d0b6eae 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1055,6 +1055,16 @@ HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Intrinsics for AVXVNNI +#define FIRST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +#define LAST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAddSaturate + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} @@ -1100,16 +1110,6 @@ HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16, HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// Intrinsics for AVXVNNI -#define FIRST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -#define LAST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAddSaturate - // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 4c9497c2df7341..11a5fe9a4d9239 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -197,13 +197,6 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_AVX10v2; } } - else if (strncmp(className + 3, "VnniInt", 7) == 0) - { - if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) - { - return InstructionSet_AVXVNNIINT; - } - } else if (strcmp(className + 3, "2") == 0) { return InstructionSet_AVX2; @@ -270,9 +263,19 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_AVXIFMA; } - else if (strcmp(className + 3, "Vnni") == 0) + else if (strncmp(className + 3, "Vnni", 4) == 0) { - return InstructionSet_AVXVNNI; + if (className[7] == '\0') + { + return InstructionSet_AVXVNNI; + } + else if (strncmp(className + 7, "Int", 3) == 0) + { + if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) + { + return InstructionSet_AVXVNNIINT; + } + } } } } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index dd69092e39a413..3f4d926bf8fa7d 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -631,6 +631,24 @@ INST3(vpdpwssd, "pdpwssd", IUM_RW, BAD_CODE, BAD_CODE, INST3(vpdpwssds, "pdpwssds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x53), INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation #define LAST_AVXVNNI_INSTRUCTION INS_vpdpwssds +#define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud +INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +#define LAST_AVXVNNIINT8_INSTRUCTION INS_vpdpwuuds + +#define FIRST_AVXVNNIINT16_INSTRUCTION INS_vpdpbssd +INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +#define LAST_AVXVNNIINT16_INSTRUCTION INS_vpdpbuuds + #define FIRST_AVXIFMA_INSTRUCTION INS_vpmadd52huq // Instructions for AVXIFMA INST3(vpmadd52huq, "pmadd52huq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB5), INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators @@ -1110,23 +1128,6 @@ INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_ INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags -#define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud -INST3(vpdpwsud, "pdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "pdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "pdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "pdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "pdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "pdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -#define LAST_AVXVNNIINT8_INSTRUCTION INS_vpdpwuuds - -#define FIRST_AVXVNNIINT16_INSTRUCTION INS_vpdpbssd -INST3(vpdpbssd, "pdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "pdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "pdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, KMask_Base4 | REX_W0 |Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -#define LAST_AVXVNNIINT16_INSTRUCTION INS_vpdpbuuds #define LAST_AVX512_INSTRUCTION INS_vpdpbuuds // id nm um mr mi rm tt flags diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 0dde10a05d8c4d..2674b48ee81614 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10489,37 +10489,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVXVNNI_MultiplyWideningAndAdd: - case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: - { - TryMakeSrcContainedOrRegOptional(node, op3); - break; - } - case NI_AVX2_MultiplyNoFlags: case NI_AVX2_X64_MultiplyNoFlags: { @@ -10575,17 +10544,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_X86Base_DivRem: - case NI_X86Base_X64_DivRem: - { - // DIV only allows divisor (op3) in memory - TryMakeSrcContainedOrRegOptional(node, op3); - break; - } - default: { - unreached(); + assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) || + (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512)); + TryMakeSrcContainedOrRegOptional(node, op3); break; } } diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 8dbc8a824bd909..0c41654a3bdad5 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -336,13 +336,6 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, _supportedInstructionSets.Add("vpclmul_v512"); } - if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx10v2_v512"))) - { - // AvxVnniInt8 and AvxVnniInt16 512 bit should also be enabled - _supportedInstructionSets.Add("avxvnniint8_v512"); - _supportedInstructionSets.Add("avxvnniint16_v512"); - } - if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx"))) { // These ISAs should automatically extend to 256-bit if diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 6f22be85677d8f..0fb2dd0f5c8c81 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -203,8 +203,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxifma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v256"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256"); @@ -224,8 +223,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512v3"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint8_v512"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint16_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vp2intersect"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index a4a5411bfd04f8..30c9ceeefcab62 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -70,6 +70,10 @@ public enum ReadyToRunInstructionSet Zba=57, Zbb=58, Sve2=59, + AvxVnniInt8=60, + AvxVnniInt8_V512=61, + AvxVnniInt16=62, + AvxVnniInt16_V512=63, Aes_V256=64, Aes_V512=65, AvxIfma=66, @@ -90,10 +94,6 @@ public enum ReadyToRunInstructionSet Avx512Vp2intersect_VL=81, Avx512Vpopcntdq=82, Avx512Vpopcntdq_VL=83, - AvxVnniInt8=84, - AvxVnniInt8_V512=85, - AvxVnniInt16=86, - AvxVnniInt16_V512=87, } } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index b21cb83029ba37..5e59f64ce6d0e4 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -114,10 +114,10 @@ instructionset ,X86 , ,VectorT256 ,40 ,VectorT instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 -instructionset ,X86 ,AvxVnniInt8 , ,84 ,AVXVNNIINT ,avxvnniint -instructionset ,X86 ,AvxVnniInt8_V512 , ,85 ,AVXVNNIINT_V512 ,avxvnniint_v512 -instructionset ,X86 ,AvxVnniInt16 , ,86 ,AVXVNNIINT ,avxvnniint -instructionset ,X86 ,AvxVnniInt16_V512 , ,87 ,AVXVNNIINT_V512 ,avxvnniint_v512 +instructionset ,X86 ,AvxVnniInt8 , ,60 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt8_V512 , ,61 ,AVXVNNIINT_V512 ,avxvnniint_v512 +instructionset ,X86 ,AvxVnniInt16 , ,62 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt16_V512 , ,63 ,AVXVNNIINT_V512 ,avxvnniint_v512 ; 64-bit Instruction Sets diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index c477e3cfbdefba..18a7a0e9b4c9a1 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -385,9 +385,12 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } - if (((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) && ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0)) // AVX-VNNI-INT8 && AVX-VNNI-INT16 + if ((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) // AVX-VNNI-INT8 { - result |= XArchIntrinsicConstants_AvxVnniInt; + if ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0) + { + result |= XArchIntrinsicConstants_AvxVnniInt; // AVX-VNNI-INT16 + } } if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0) // AVX-IFMA From 58699a6b83c9ed1a2570826ed452ef0c6dae9cbe Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 13 Jun 2025 16:07:38 -0700 Subject: [PATCH 24/32] update jiteeversionguid --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 38a900d0178d00..312a8cfd871973 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 2004006b-bdff-4357-8e60-3ae950a4f165 */ - 0x2004006b, - 0xbdff, - 0x4357, - {0x8e, 0x60, 0x3a, 0xe9, 0x50, 0xa4, 0xf1, 0x65} +constexpr GUID JITEEVersionIdentifier = { /* a1f8d16e-478c-4c5b-a467-f14a35b48c05 */ + 0xa1f8d16e, + 0x478c, + 0x4c5b, + {0xa4, 0x67, 0xf1, 0x4a, 0x35, 0xb4, 0x8c, 0x05} }; #endif // JIT_EE_VERSIONING_GUID_H From 1eba64dec6fc6aa65cc5e1ce78a059d24fabbf46 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 16 Jun 2025 14:40:58 -0700 Subject: [PATCH 25/32] Use AuxiliaryJitType to track args types for AVX VNNI INT --- src/coreclr/jit/compiler.cpp | 593 ++++++++++++++++++ src/coreclr/jit/compiler.h | 7 + src/coreclr/jit/hwintrinsic.cpp | 326 +--------- src/coreclr/jit/hwintrinsic.h | 15 +- src/coreclr/jit/hwintrinsicarm64.cpp | 157 ----- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 190 +++++- src/coreclr/jit/hwintrinsiclistxarch.h | 42 +- src/coreclr/jit/hwintrinsicxarch.cpp | 485 +------------- src/coreclr/jit/instrsxarch.h | 2 +- src/coreclr/jit/lsraxarch.cpp | 28 +- ...rdwareIntrinsics_X86_AvxVnniInt16_r.csproj | 20 - ...dwareIntrinsics_X86_AvxVnniInt16_ro.csproj | 20 - ...ardwareIntrinsics_X86_AvxVnniInt8_r.csproj | 20 - ...rdwareIntrinsics_X86_AvxVnniInt8_ro.csproj | 20 - .../AvxVnniInt16/AvxVnniInt16SampleTest.cs | 0 .../AvxVnniInt16_handwritten_r.csproj | 0 .../AvxVnniInt16_handwritten_ro.csproj | 0 .../AvxVnniInt16/AvxVnniInt16_r.csproj | 0 .../AvxVnniInt16/AvxVnniInt16_ro.csproj | 0 .../AvxVnniInt16/Program.AvxVnniInt16.cs | 0 .../AvxVnniInt16_V512SampleTest.cs | 0 .../AvxVnniInt16_V512_handwritten_r.csproj | 0 .../AvxVnniInt16_V512_handwritten_ro.csproj | 0 .../AvxVnniInt16_V512_r.csproj | 0 .../AvxVnniInt16_V512_ro.csproj | 0 .../Program.AvxVnniInt16_V512.cs | 0 .../AvxVnniInt8/AvxVnniInt8SampleTest.cs | 0 .../AvxVnniInt8_handwritten_r.csproj | 0 .../AvxVnniInt8_handwritten_ro.csproj | 0 .../AvxVnniInt8/AvxVnniInt8_r.csproj | 0 .../AvxVnniInt8/AvxVnniInt8_ro.csproj | 0 .../AvxVnniInt8/Program.AvxVnniInt8.cs | 0 .../AvxVnniInt8_V512SampleTest.cs | 0 .../AvxVnniInt8_V512_handwritten_r.csproj | 0 .../AvxVnniInt8_V512_handwritten_ro.csproj | 0 .../AvxVnniInt8_V512_r.csproj | 0 .../AvxVnniInt8_V512_ro.csproj | 0 .../Program.AvxVnniInt8_V512.cs | 0 .../X86_AvxVnniInt16/Directory.Build.props | 10 - .../X86_AvxVnniInt16/Directory.Build.targets | 32 - .../X86_AvxVnniInt8/Directory.Build.props | 10 - .../X86_AvxVnniInt8/Directory.Build.targets | 32 - .../SmokeTests/HardwareIntrinsics/Program.cs | 67 +- 43 files changed, 868 insertions(+), 1208 deletions(-) delete mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj delete mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj delete mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj delete mode 100644 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/AvxVnniInt16SampleTest.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/AvxVnniInt16_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/AvxVnniInt16_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16/Program.AvxVnniInt16.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt16 => X86_Avx}/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/AvxVnniInt8SampleTest.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/AvxVnniInt8_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/AvxVnniInt8_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8/Program.AvxVnniInt8.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj (100%) rename src/tests/JIT/HardwareIntrinsics/{X86_AvxVnniInt8 => X86_Avx}/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs (100%) delete mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props delete mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets delete mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props delete mode 100644 src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d0d704f02063a2..ddda783b0107d5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -460,6 +460,599 @@ Compiler::Compiler(ArenaAllocator* arena, info.compUsesAsyncContinuation = false; } +#ifdef FEATURE_HW_INTRINSICS +#if defined(TARGET_XARCH) +//------------------------------------------------------------------------ +// X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The 64-bit only InstructionSet associated with isa +static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_X86Base: + return InstructionSet_X86Base_X64; + case InstructionSet_SSE42: + return InstructionSet_SSE42_X64; + case InstructionSet_AVX: + return InstructionSet_AVX_X64; + case InstructionSet_AVX2: + return InstructionSet_AVX2_X64; + case InstructionSet_AVX512: + return InstructionSet_AVX512_X64; + case InstructionSet_AVX512v2: + return InstructionSet_AVX512v2_X64; + case InstructionSet_AVX512v3: + return InstructionSet_AVX512v3_X64; + case InstructionSet_AVX10v1: + return InstructionSet_AVX10v1_X64; + case InstructionSet_AVX10v2: + return InstructionSet_AVX10v2_X64; + case InstructionSet_AES: + return InstructionSet_AES_X64; + case InstructionSet_AVX512VP2INTERSECT: + return InstructionSet_AVX512VP2INTERSECT_X64; + case InstructionSet_AVXIFMA: + return InstructionSet_AVXIFMA_X64; + case InstructionSet_AVXVNNI: + return InstructionSet_AVXVNNI_X64; + case InstructionSet_AVXVNNIINT: + return InstructionSet_AVXVNNIINT_X64; + case InstructionSet_AVXVNNIINT_V512: + return InstructionSet_AVXVNNIINT_V512_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_X64; + case InstructionSet_SHA: + return InstructionSet_SHA_X64; + case InstructionSet_WAITPKG: + return InstructionSet_WAITPKG_X64; + case InstructionSet_X86Serialize: + return InstructionSet_X86Serialize_X64; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// VLVersionOfIsa: Gets the corresponding AVX512VL only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The AVX512VL only InstructionSet associated with isa +static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AVX512: + case InstructionSet_AVX512v2: + case InstructionSet_AVX512v3: + case InstructionSet_AVX10v1: + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V256 only InstructionSet associated with isa +static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AES: + { + return InstructionSet_AES_V256; + } + + case InstructionSet_GFNI: + { + return InstructionSet_GFNI_V256; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V512 only InstructionSet associated with isa +static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AVX10v1: + case InstructionSet_AVX10v1_X64: + case InstructionSet_AVX10v2: + case InstructionSet_AVX10v2_X64: + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + + case InstructionSet_AES: + { + return InstructionSet_AES_V512; + } + + case InstructionSet_GFNI: + { + return InstructionSet_GFNI_V512; + } + + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: + { + return InstructionSet_AVXVNNIINT_V512; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// lookupInstructionSet: Gets the InstructionSet for a given class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// +// Return Value: +// The InstructionSet associated with className +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) +{ + assert(className != nullptr); + + if (className[0] == 'A') + { + if (strcmp(className + 1, "es") == 0) + { + return InstructionSet_AES; + } + else if (strncmp(className + 1, "vx", 2) == 0) + { + if (className[3] == '\0') + { + return InstructionSet_AVX; + } + else if (strncmp(className + 3, "10v", 3) == 0) + { + if (strcmp(className + 6, "1") == 0) + { + return InstructionSet_AVX10v1; + } + else if (strcmp(className + 6, "2") == 0) + { + return InstructionSet_AVX10v2; + } + } + else if (strcmp(className + 3, "2") == 0) + { + return InstructionSet_AVX2; + } + else if (strncmp(className + 3, "512", 3) == 0) + { + if (className[6] == 'B') + { + if (strcmp(className + 7, "italg") == 0) + { + return InstructionSet_AVX512v3; + } + else if (strcmp(className + 7, "f16") == 0) + { + return InstructionSet_AVX10v1; + } + else if (strcmp(className + 7, "W") == 0) + { + return InstructionSet_AVX512; + } + } + else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) + { + return InstructionSet_AVX512; + } + else if (className[6] == 'F') + { + if (className[7] == '\0') + { + return InstructionSet_AVX512; + } + else if (strcmp(className + 7, "p16") == 0) + { + return InstructionSet_AVX10v1; + } + } + else if (className[6] == 'V') + { + if (strncmp(className + 7, "bmi", 3) == 0) + { + if (className[10] == '\0') + { + return InstructionSet_AVX512v2; + } + else if (strcmp(className + 10, "2") == 0) + { + return InstructionSet_AVX512v3; + } + } + else if (className[7] == 'p') + { + if (strcmp(className + 8, "p2intersect") == 0) + { + return InstructionSet_AVX512VP2INTERSECT; + } + else if (strcmp(className + 8, "opcntdq") == 0) + { + return InstructionSet_AVX512v3; + } + } + } + } + else if (strcmp(className + 3, "Ifma") == 0) + { + return InstructionSet_AVXIFMA; + } + else if (strncmp(className + 3, "Vnni", 4) == 0) + { + if (className[7] == '\0') + { + return InstructionSet_AVXVNNI; + } + else if (strncmp(className + 7, "Int", 3) == 0) + { + if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) + { + if (compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) + { + return InstructionSet_AVXVNNIINT; + } + else + { + return InstructionSet_AVXVNNIINT_V512; + } + } + } + } + } + } + else if (className[0] == 'B') + { + if (strncmp(className + 1, "mi", 2) == 0) + { + if (strcmp(className + 3, "1") == 0) + { + return InstructionSet_AVX2; + } + else if (strcmp(className + 3, "2") == 0) + { + return InstructionSet_AVX2; + } + } + } + else if (className[0] == 'F') + { + if (strcmp(className + 1, "ma") == 0) + { + return InstructionSet_AVX2; + } + else if (strcmp(className + 1, "16c") == 0) + { + return InstructionSet_AVX2; + } + } + else if (className[0] == 'G') + { + if (strcmp(className + 1, "fni") == 0) + { + return InstructionSet_GFNI; + } + } + else if (className[0] == 'L') + { + if (strcmp(className + 1, "zcnt") == 0) + { + return InstructionSet_AVX2; + } + } + else if (className[0] == 'P') + { + if (strcmp(className + 1, "clmulqdq") == 0) + { + return InstructionSet_AES; + } + else if (strcmp(className + 1, "opcnt") == 0) + { + return InstructionSet_SSE42; + } + } + else if (className[0] == 'S') + { + if (strcmp(className + 1, "ha") == 0) + { + return InstructionSet_SHA; + } + else if (strncmp(className + 1, "se", 2) == 0) + { + if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) + { + return InstructionSet_X86Base; + } + else if (strcmp(className + 3, "3") == 0) + { + return InstructionSet_SSE42; + } + else if (strcmp(className + 3, "41") == 0) + { + return InstructionSet_SSE42; + } + else if (strcmp(className + 3, "42") == 0) + { + return InstructionSet_SSE42; + } + } + else if (strcmp(className + 1, "sse3") == 0) + { + return InstructionSet_SSE42; + } + } + else if (className[0] == 'V') + { + if (strncmp(className + 1, "ector", 5) == 0) + { + if (strncmp(className + 6, "128", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector128; + } + } + else if (strncmp(className + 6, "256", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector256; + } + } + else if (strncmp(className + 6, "512", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector512; + } + } + } + else if (strcmp(className + 1, "L") == 0) + { + assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); + return InstructionSet_ILLEGAL; + } + } + else if (strcmp(className, "WaitPkg") == 0) + { + return InstructionSet_WAITPKG; + } + else if (strncmp(className, "X86", 3) == 0) + { + if (strcmp(className + 3, "Base") == 0) + { + return InstructionSet_X86Base; + } + else if (strcmp(className + 3, "Serialize") == 0) + { + return InstructionSet_X86Serialize; + } + } + + return InstructionSet_ILLEGAL; +} + +#else // TARGET_ARM64 +//------------------------------------------------------------------------ +// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The 64-bit only InstructionSet associated with isa +static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AdvSimd: + return InstructionSet_AdvSimd_Arm64; + case InstructionSet_Aes: + return InstructionSet_Aes_Arm64; + case InstructionSet_ArmBase: + return InstructionSet_ArmBase_Arm64; + case InstructionSet_Crc32: + return InstructionSet_Crc32_Arm64; + case InstructionSet_Dp: + return InstructionSet_Dp_Arm64; + case InstructionSet_Sha1: + return InstructionSet_Sha1_Arm64; + case InstructionSet_Sha256: + return InstructionSet_Sha256_Arm64; + case InstructionSet_Rdm: + return InstructionSet_Rdm_Arm64; + case InstructionSet_Sve: + return InstructionSet_Sve_Arm64; + case InstructionSet_Sve2: + return InstructionSet_Sve2_Arm64; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// lookupInstructionSet: Gets the InstructionSet for a given class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// +// Return Value: +// The InstructionSet associated with className +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) +{ + assert(className != nullptr); + + if (className[0] == 'A') + { + if (strcmp(className, "AdvSimd") == 0) + { + return InstructionSet_AdvSimd; + } + if (strcmp(className, "Aes") == 0) + { + return InstructionSet_Aes; + } + if (strcmp(className, "ArmBase") == 0) + { + return InstructionSet_ArmBase; + } + } + else if (className[0] == 'C') + { + if (strcmp(className, "Crc32") == 0) + { + return InstructionSet_Crc32; + } + } + else if (className[0] == 'D') + { + if (strcmp(className, "Dp") == 0) + { + return InstructionSet_Dp; + } + } + else if (className[0] == 'R') + { + if (strcmp(className, "Rdm") == 0) + { + return InstructionSet_Rdm; + } + } + else if (className[0] == 'S') + { + if (strcmp(className, "Sha1") == 0) + { + return InstructionSet_Sha1; + } + if (strcmp(className, "Sha256") == 0) + { + return InstructionSet_Sha256; + } + if (strcmp(className, "Sve2") == 0) + { + return InstructionSet_Sve2; + } + if (strcmp(className, "Sve") == 0) + { + return InstructionSet_Sve; + } + } + else if (className[0] == 'V') + { + if (strncmp(className, "Vector64", 8) == 0) + { + return InstructionSet_Vector64; + } + else if (strncmp(className, "Vector128", 9) == 0) + { + return InstructionSet_Vector128; + } + } + + return InstructionSet_ILLEGAL; +} +#endif // TARGET_XARCH + +//------------------------------------------------------------------------ +// lookupIsa: Gets the InstructionSet for a given class name and enclosing class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// innerEnclosingClassName -- The name of the inner enclosing class of X64 classes +// outerEnclosingClassName -- The name of the outer enclosing class of X64 classes +// +// Return Value: +// The InstructionSet associated with className and enclosingClassName +CORINFO_InstructionSet Compiler::lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName) +{ + assert(className != nullptr); + + if (innerEnclosingClassName == nullptr) + { + // No nested class is the most common, so fast path it + return lookupInstructionSet(className); + } + + // Since lookupId is only called for the xplat intrinsics + // or intrinsics in the platform specific namespace, we assume + // that it will be one we can handle and don't try to early out. + + CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); + +#if defined(TARGET_XARCH) + if (className[0] == 'V') + { + if (strcmp(className, "V256") == 0) + { + return V256VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "V512") == 0) + { + return V512VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "VL") == 0) + { + return VLVersionOfIsa(enclosingIsa); + } + } + else if (strcmp(className, "X64") == 0) + { + return X64VersionOfIsa(enclosingIsa); + } +#else // TARGET_ARM64 + if (strcmp(className, "Arm64") == 0) + { + return Arm64VersionOfIsa(enclosingIsa); + } +#endif // TARGET_XARCH + + return InstructionSet_ILLEGAL; +} +#endif // FEATURE_HW_INTRINSICS + //------------------------------------------------------------------------ // getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3d6cd904ffd399..f17ea09051312a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9616,6 +9616,13 @@ class Compiler return false; } +#ifdef FEATURE_HW_INTRINSICS + CORINFO_InstructionSet lookupInstructionSet(const char* className); + CORINFO_InstructionSet lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName); +#endif // FEATURE_HW_INTRINSICS + #ifdef DEBUG // Answer the question: Is a particular ISA supported? // Use this api when asking the question so that future diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 837dad875b2ae6..9e9d369615969b 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1184,28 +1184,8 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - CORINFO_InstructionSet isa = lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName); + CORINFO_InstructionSet isa = comp->lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName); -#ifdef TARGET_XARCH - // This handling makes sure that if we dont have VEX version of AVXVNNIINT instructions, - // we try to use the EVEX version. - // AVXVNNIINT tracks the VEX instructions where as - // AVXVNNIINT_V512 tracks the EVEX versions of same instructions. - if (isa == InstructionSet_AVXVNNIINT || isa == InstructionSet_AVXVNNIINT_X64) - { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) - { - if (isa == InstructionSet_AVXVNNIINT) - { - isa = InstructionSet_AVXVNNIINT_V512; - } - else - { - isa = InstructionSet_AVXVNNIINT_V512_X64; - } - } - } -#endif if (isa == InstructionSet_ILLEGAL) { return NI_Illegal; @@ -2398,310 +2378,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { #if defined(TARGET_XARCH) - case NI_AVXVNNIINT_MultiplyWideningAndAdd: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - switch (op2Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddByteByte; - break; - } - - case TYP_BYTE: - { - switch (op3Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte; - break; - } - - case TYP_BYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte; - break; - } - - default: - { - unreached(); - } - } - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16; - break; - } - - case TYP_USHORT: - { - switch (op3Type) - { - case TYP_USHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16; - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16; - break; - } - - default: - { - unreached(); - } - } - break; - } - - default: - { - unreached(); - } - } - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - switch (op2Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate; - break; - } - - case TYP_BYTE: - { - switch (op3Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate; - break; - } - - case TYP_BYTE: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate; - break; - } - - default: - { - unreached(); - } - } - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate; - break; - } - - case TYP_USHORT: - { - switch (op3Type) - { - case TYP_USHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate; - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate; - break; - } - - default: - { - unreached(); - } - } - break; - } - - default: - { - unreached(); - } - } - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - switch (op2Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte; - break; - } - - case TYP_BYTE: - { - switch (op3Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte; - break; - } - - case TYP_BYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte; - break; - } - - default: - { - unreached(); - } - } - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16; - break; - } - - case TYP_USHORT: - { - switch (op3Type) - { - case TYP_USHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16; - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16; - break; - } - - default: - { - unreached(); - } - } - break; - } - - default: - { - unreached(); - } - } - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: - { - var_types op2Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd)); - var_types op3Type = JitType2PreciseVarType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd)); - switch (op2Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate; - break; - } - - case TYP_BYTE: - { - switch (op3Type) - { - case TYP_UBYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate; - break; - } - - case TYP_BYTE: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate; - break; - } - - default: - { - unreached(); - } - } - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate; - break; - } - - case TYP_USHORT: - { - switch (op3Type) - { - case TYP_USHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate; - break; - } - - case TYP_SHORT: - { - intrinsic = NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate; - break; - } - - default: - { - unreached(); - } - } - break; - } - - default: - { - unreached(); - } - } - retNode = - gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - break; - } - case NI_AVX2_GatherVector128: case NI_AVX2_GatherVector256: assert(varTypeIsSIMD(op2->TypeGet())); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index a0259f32ac0972..86440c9c79cfd3 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -525,15 +525,12 @@ struct HWIntrinsicInfo static const HWIntrinsicInfo& lookup(NamedIntrinsic id); - static NamedIntrinsic lookupId(Compiler* comp, - CORINFO_SIG_INFO* sig, - const char* className, - const char* methodName, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName); - static CORINFO_InstructionSet lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName); + static NamedIntrinsic lookupId(Compiler* comp, + CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName); static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 083311812ecac2..d6cde7841df99e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -5,163 +5,6 @@ #include "hwintrinsic.h" #ifdef FEATURE_HW_INTRINSICS - -//------------------------------------------------------------------------ -// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The 64-bit only InstructionSet associated with isa -static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AdvSimd: - return InstructionSet_AdvSimd_Arm64; - case InstructionSet_Aes: - return InstructionSet_Aes_Arm64; - case InstructionSet_ArmBase: - return InstructionSet_ArmBase_Arm64; - case InstructionSet_Crc32: - return InstructionSet_Crc32_Arm64; - case InstructionSet_Dp: - return InstructionSet_Dp_Arm64; - case InstructionSet_Sha1: - return InstructionSet_Sha1_Arm64; - case InstructionSet_Sha256: - return InstructionSet_Sha256_Arm64; - case InstructionSet_Rdm: - return InstructionSet_Rdm_Arm64; - case InstructionSet_Sve: - return InstructionSet_Sve_Arm64; - case InstructionSet_Sve2: - return InstructionSet_Sve2_Arm64; - default: - return InstructionSet_NONE; - } -} - -//------------------------------------------------------------------------ -// lookupInstructionSet: Gets the InstructionSet for a given class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// -// Return Value: -// The InstructionSet associated with className -static CORINFO_InstructionSet lookupInstructionSet(const char* className) -{ - assert(className != nullptr); - - if (className[0] == 'A') - { - if (strcmp(className, "AdvSimd") == 0) - { - return InstructionSet_AdvSimd; - } - if (strcmp(className, "Aes") == 0) - { - return InstructionSet_Aes; - } - if (strcmp(className, "ArmBase") == 0) - { - return InstructionSet_ArmBase; - } - } - else if (className[0] == 'C') - { - if (strcmp(className, "Crc32") == 0) - { - return InstructionSet_Crc32; - } - } - else if (className[0] == 'D') - { - if (strcmp(className, "Dp") == 0) - { - return InstructionSet_Dp; - } - } - else if (className[0] == 'R') - { - if (strcmp(className, "Rdm") == 0) - { - return InstructionSet_Rdm; - } - } - else if (className[0] == 'S') - { - if (strcmp(className, "Sha1") == 0) - { - return InstructionSet_Sha1; - } - if (strcmp(className, "Sha256") == 0) - { - return InstructionSet_Sha256; - } - if (strcmp(className, "Sve2") == 0) - { - return InstructionSet_Sve2; - } - if (strcmp(className, "Sve") == 0) - { - return InstructionSet_Sve; - } - } - else if (className[0] == 'V') - { - if (strncmp(className, "Vector64", 8) == 0) - { - return InstructionSet_Vector64; - } - else if (strncmp(className, "Vector128", 9) == 0) - { - return InstructionSet_Vector128; - } - } - - return InstructionSet_ILLEGAL; -} - -//------------------------------------------------------------------------ -// lookupIsa: Gets the InstructionSet for a given class name and enclsoing class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// innerEnclosingClassName -- The name of the inner enclosing class or nullptr if one doesn't exist -// outerEnclosingClassName -- The name of the outer enclosing class or nullptr if one doesn't exist -// -// Return Value: -// The InstructionSet associated with className and enclosingClassName -// -CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName) -{ - assert(className != nullptr); - - if (innerEnclosingClassName == nullptr) - { - // No nested class is the most common, so fast path it - return lookupInstructionSet(className); - } - - // Since lookupId is only called for the xplat intrinsics - // or intrinsics in the platform specific namespace, we assume - // that it will be one we can handle and don't try to early out. - - CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); - - if (strcmp(className, "Arm64") == 0) - { - return Arm64VersionOfIsa(enclosingIsa); - } - - return InstructionSet_ILLEGAL; -} - //------------------------------------------------------------------------ // lookupIval: Gets a the implicit immediate value for the given intrinsic // diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 19047c2e5bb68a..a788f8845ba907 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -849,6 +849,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + case NI_AVX512_CompressMask: case NI_AVX512_ExpandMask: { @@ -873,12 +884,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) default: { - assert(intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512); - assert(targetReg != REG_NA); - assert(op1Reg != REG_NA); - assert(op2Reg != REG_NA); - - genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); + unreached(); break; }; } @@ -983,6 +989,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX512: case InstructionSet_AVX512_X64: case InstructionSet_AVX512v2: + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: { genAvxFamilyIntrinsic(node, instOptions); break; @@ -3450,6 +3458,176 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: + { + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + var_types op3Type = node->GetAuxiliaryType(); + switch (baseType) + { + case TYP_UBYTE: + { + ins = INS_vpdpbuuds; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + ins = INS_vpdpbsuds; + break; + } + + case TYP_BYTE: + { + ins = INS_vpdpbssds; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwsuds; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + ins = INS_vpdpwuuds; + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwusds; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } + + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + { + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + var_types op3Type = node->GetAuxiliaryType(); + switch (baseType) + { + case TYP_UBYTE: + { + ins = INS_vpdpbuud; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + ins = INS_vpdpbsud; + break; + } + + case TYP_BYTE: + { + ins = INS_vpdpbssd; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwsud; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + ins = INS_vpdpwuud; + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwusd; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } + + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + default: unreached(); break; diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index b00522e52f098a..e8413afa41ea23 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1079,48 +1079,24 @@ HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVXVNNIINT16 Intrinsics +// AVXVNNIINT Intrinsics #define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_vpdpwsud, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_vpdpwsuds, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +#define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVXVNNIINT16 Intrinsics +// AVXVNNIINT_V512 Intrinsics #define FIRST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_vpdpbssd, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddByteByte, -1, 3, {INS_invalid, INS_vpdpbuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddByteByteSaturate, -1, 3, {INS_invalid, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_vpdpwsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteByte, -1, 3, {INS_vpdpbsud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteByteSaturate, -1, 3, {INS_vpdpbsuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteSByte, -1, 3, {INS_vpdpbssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSByteSByteSaturate, -1, 3, {INS_vpdpbssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_vpdpbssds, INS_vpdpbuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16Int16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16Int16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwusds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddUInt16UInt16Saturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vpdpwuuds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +#define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 7a9dcbed96eb00..aa799c1d731037 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -5,462 +5,6 @@ #include "hwintrinsic.h" #ifdef FEATURE_HW_INTRINSICS - -//------------------------------------------------------------------------ -// X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The 64-bit only InstructionSet associated with isa -static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_X86Base: - return InstructionSet_X86Base_X64; - case InstructionSet_SSE42: - return InstructionSet_SSE42_X64; - case InstructionSet_AVX: - return InstructionSet_AVX_X64; - case InstructionSet_AVX2: - return InstructionSet_AVX2_X64; - case InstructionSet_AVX512: - return InstructionSet_AVX512_X64; - case InstructionSet_AVX512v2: - return InstructionSet_AVX512v2_X64; - case InstructionSet_AVX512v3: - return InstructionSet_AVX512v3_X64; - case InstructionSet_AVX10v1: - return InstructionSet_AVX10v1_X64; - case InstructionSet_AVX10v2: - return InstructionSet_AVX10v2_X64; - case InstructionSet_AES: - return InstructionSet_AES_X64; - case InstructionSet_AVX512VP2INTERSECT: - return InstructionSet_AVX512VP2INTERSECT_X64; - case InstructionSet_AVXIFMA: - return InstructionSet_AVXIFMA_X64; - case InstructionSet_AVXVNNI: - return InstructionSet_AVXVNNI_X64; - case InstructionSet_AVXVNNIINT: - return InstructionSet_AVXVNNIINT_X64; - case InstructionSet_AVXVNNIINT_V512: - return InstructionSet_AVXVNNIINT_V512_X64; - case InstructionSet_GFNI: - return InstructionSet_GFNI_X64; - case InstructionSet_SHA: - return InstructionSet_SHA_X64; - case InstructionSet_WAITPKG: - return InstructionSet_WAITPKG_X64; - case InstructionSet_X86Serialize: - return InstructionSet_X86Serialize_X64; - default: - return InstructionSet_NONE; - } -} - -//------------------------------------------------------------------------ -// VLVersionOfIsa: Gets the corresponding AVX512VL only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The AVX512VL only InstructionSet associated with isa -static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AVX512: - case InstructionSet_AVX512v2: - case InstructionSet_AVX512v3: - case InstructionSet_AVX10v1: - { - // These nested ISAs aren't tracked by the JIT support - return isa; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -//------------------------------------------------------------------------ -// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The V256 only InstructionSet associated with isa -static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AES: - { - return InstructionSet_AES_V256; - } - - case InstructionSet_GFNI: - { - return InstructionSet_GFNI_V256; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -//------------------------------------------------------------------------ -// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The V512 only InstructionSet associated with isa -static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v2: - case InstructionSet_AVX10v2_X64: - { - // These nested ISAs aren't tracked by the JIT support - return isa; - } - - case InstructionSet_AES: - { - return InstructionSet_AES_V512; - } - - case InstructionSet_GFNI: - { - return InstructionSet_GFNI_V512; - } - - case InstructionSet_AVXVNNIINT: - case InstructionSet_AVXVNNIINT_V512: - { - return InstructionSet_AVXVNNIINT_V512; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -//------------------------------------------------------------------------ -// lookupInstructionSet: Gets the InstructionSet for a given class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// -// Return Value: -// The InstructionSet associated with className -static CORINFO_InstructionSet lookupInstructionSet(const char* className) -{ - assert(className != nullptr); - - if (className[0] == 'A') - { - if (strcmp(className + 1, "es") == 0) - { - return InstructionSet_AES; - } - else if (strncmp(className + 1, "vx", 2) == 0) - { - if (className[3] == '\0') - { - return InstructionSet_AVX; - } - else if (strncmp(className + 3, "10v", 3) == 0) - { - if (strcmp(className + 6, "1") == 0) - { - return InstructionSet_AVX10v1; - } - else if (strcmp(className + 6, "2") == 0) - { - return InstructionSet_AVX10v2; - } - } - else if (strcmp(className + 3, "2") == 0) - { - return InstructionSet_AVX2; - } - else if (strncmp(className + 3, "512", 3) == 0) - { - if (className[6] == 'B') - { - if (strcmp(className + 7, "italg") == 0) - { - return InstructionSet_AVX512v3; - } - else if (strcmp(className + 7, "f16") == 0) - { - return InstructionSet_AVX10v1; - } - else if (strcmp(className + 7, "W") == 0) - { - return InstructionSet_AVX512; - } - } - else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) - { - return InstructionSet_AVX512; - } - else if (className[6] == 'F') - { - if (className[7] == '\0') - { - return InstructionSet_AVX512; - } - else if (strcmp(className + 7, "p16") == 0) - { - return InstructionSet_AVX10v1; - } - } - else if (className[6] == 'V') - { - if (strncmp(className + 7, "bmi", 3) == 0) - { - if (className[10] == '\0') - { - return InstructionSet_AVX512v2; - } - else if (strcmp(className + 10, "2") == 0) - { - return InstructionSet_AVX512v3; - } - } - else if (className[7] == 'p') - { - if (strcmp(className + 8, "p2intersect") == 0) - { - return InstructionSet_AVX512VP2INTERSECT; - } - else if (strcmp(className + 8, "opcntdq") == 0) - { - return InstructionSet_AVX512v3; - } - } - } - } - else if (strcmp(className + 3, "Ifma") == 0) - { - return InstructionSet_AVXIFMA; - } - else if (strncmp(className + 3, "Vnni", 4) == 0) - { - if (className[7] == '\0') - { - return InstructionSet_AVXVNNI; - } - else if (strncmp(className + 7, "Int", 3) == 0) - { - if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) - { - return InstructionSet_AVXVNNIINT; - } - } - } - } - } - else if (className[0] == 'B') - { - if (strncmp(className + 1, "mi", 2) == 0) - { - if (strcmp(className + 3, "1") == 0) - { - return InstructionSet_AVX2; - } - else if (strcmp(className + 3, "2") == 0) - { - return InstructionSet_AVX2; - } - } - } - else if (className[0] == 'F') - { - if (strcmp(className + 1, "ma") == 0) - { - return InstructionSet_AVX2; - } - else if (strcmp(className + 1, "16c") == 0) - { - return InstructionSet_AVX2; - } - } - else if (className[0] == 'G') - { - if (strcmp(className + 1, "fni") == 0) - { - return InstructionSet_GFNI; - } - } - else if (className[0] == 'L') - { - if (strcmp(className + 1, "zcnt") == 0) - { - return InstructionSet_AVX2; - } - } - else if (className[0] == 'P') - { - if (strcmp(className + 1, "clmulqdq") == 0) - { - return InstructionSet_AES; - } - else if (strcmp(className + 1, "opcnt") == 0) - { - return InstructionSet_SSE42; - } - } - else if (className[0] == 'S') - { - if (strcmp(className + 1, "ha") == 0) - { - return InstructionSet_SHA; - } - else if (strncmp(className + 1, "se", 2) == 0) - { - if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) - { - return InstructionSet_X86Base; - } - else if (strcmp(className + 3, "3") == 0) - { - return InstructionSet_SSE42; - } - else if (strcmp(className + 3, "41") == 0) - { - return InstructionSet_SSE42; - } - else if (strcmp(className + 3, "42") == 0) - { - return InstructionSet_SSE42; - } - } - else if (strcmp(className + 1, "sse3") == 0) - { - return InstructionSet_SSE42; - } - } - else if (className[0] == 'V') - { - if (strncmp(className + 1, "ector", 5) == 0) - { - if (strncmp(className + 6, "128", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector128; - } - } - else if (strncmp(className + 6, "256", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector256; - } - } - else if (strncmp(className + 6, "512", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector512; - } - } - } - else if (strcmp(className + 1, "L") == 0) - { - assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); - return InstructionSet_ILLEGAL; - } - } - else if (strcmp(className, "WaitPkg") == 0) - { - return InstructionSet_WAITPKG; - } - else if (strncmp(className, "X86", 3) == 0) - { - if (strcmp(className + 3, "Base") == 0) - { - return InstructionSet_X86Base; - } - else if (strcmp(className + 3, "Serialize") == 0) - { - return InstructionSet_X86Serialize; - } - } - - return InstructionSet_ILLEGAL; -} - -//------------------------------------------------------------------------ -// lookupIsa: Gets the InstructionSet for a given class name and enclosing class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// innerEnclosingClassName -- The name of the inner enclosing class of X64 classes -// outerEnclosingClassName -- The name of the outer enclosing class of X64 classes -// -// Return Value: -// The InstructionSet associated with className and enclosingClassName -CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName) -{ - assert(className != nullptr); - - if (innerEnclosingClassName == nullptr) - { - // No nested class is the most common, so fast path it - return lookupInstructionSet(className); - } - - // Since lookupId is only called for the xplat intrinsics - // or intrinsics in the platform specific namespace, we assume - // that it will be one we can handle and don't try to early out. - - CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); - - if (className[0] == 'V') - { - if (strcmp(className, "V256") == 0) - { - return V256VersionOfIsa(enclosingIsa); - } - else if (strcmp(className, "V512") == 0) - { - return V512VersionOfIsa(enclosingIsa); - } - else if (strcmp(className, "VL") == 0) - { - return VLVersionOfIsa(enclosingIsa); - } - } - else if (strcmp(className, "X64") == 0) - { - return X64VersionOfIsa(enclosingIsa); - } - - return InstructionSet_ILLEGAL; -} - //------------------------------------------------------------------------ // lookupImmUpperBound: Gets the upper bound for the imm-value of a given NamedIntrinsic // @@ -5241,6 +4785,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: + { + assert(sig->numArgs == 3); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + CORINFO_CLASS_HANDLE argClass; + var_types argType = TYP_UNKNOWN; + + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + CorInfoType op3BaseJitType = getBaseJitTypeOfSIMDType(argClass); + GenTree* op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op3BaseJitType); + break; + } + case NI_AVX2_GatherMaskVector128: case NI_AVX2_GatherMaskVector256: { diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 3f4d926bf8fa7d..7f140e5227c4bc 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1128,7 +1128,7 @@ INST3(vmpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_ INST3(vucomxsd, "ucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags INST3(vucomxss, "ucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags -#define LAST_AVX512_INSTRUCTION INS_vpdpbuuds +#define LAST_AVX512_INSTRUCTION INS_vucomxss // id nm um mr mi rm tt flags #define FIRST_APX_INSTRUCTION INS_ccmpo diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index f1b0231f2d267d..7e31d5f97fd7c0 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2753,30 +2753,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_MultiplyWideningAndAddUInt16Int16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByte: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteSByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddByteByteSaturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16UInt16Saturate: - case NI_AVXVNNIINT_V512_MultiplyWideningAndAddUInt16Int16Saturate: + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: { assert(numArgs == 3); diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj deleted file mode 100644 index f22223d3c45b2e..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_r.csproj +++ /dev/null @@ -1,20 +0,0 @@ - - - 2 - - true - true - - - - true - true - true - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj deleted file mode 100644 index b1dee63473dc31..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt16_ro.csproj +++ /dev/null @@ -1,20 +0,0 @@ - - - 2 - - true - true - - - - true - true - true - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj deleted file mode 100644 index 5f1151b24b7607..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_r.csproj +++ /dev/null @@ -1,20 +0,0 @@ - - - 2 - - true - true - - - - true - true - true - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj deleted file mode 100644 index 612599305d9b32..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_X86_AvxVnniInt8_ro.csproj +++ /dev/null @@ -1,20 +0,0 @@ - - - 2 - - true - true - - - - true - true - true - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16SampleTest.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/AvxVnniInt16_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16/Program.AvxVnniInt16.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8SampleTest.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/AvxVnniInt8_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8/Program.AvxVnniInt8.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs rename to src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props deleted file mode 100644 index e3e1bac79c32c5..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.props +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - true - - - diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets deleted file mode 100644 index 5b046968c0461a..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt16/Directory.Build.targets +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - $(IntermediateOutputPath)$(MSBuildProjectName)/gen/ - $(GeneratedHWIntrinsicTestDirectory)GeneratedHWIntrinsicTestList.txt - - - - - - - - - - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props deleted file mode 100644 index e3e1bac79c32c5..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.props +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - true - - - diff --git a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets b/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets deleted file mode 100644 index 5b046968c0461a..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/X86_AvxVnniInt8/Directory.Build.targets +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - $(IntermediateOutputPath)$(MSBuildProjectName)/gen/ - $(GeneratedHWIntrinsicTestDirectory)GeneratedHWIntrinsicTestList.txt - - - - - - - - - - - - - - - - - diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 255f8cce9c7068..f5c24549205a28 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -64,8 +64,8 @@ static int Main() bool? ExpectedAvx2 = false; bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; - bool? ExpectedAvxVnniInt8 = false; - bool? ExpectedAvxVnniInt16 = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedAvx10v2 = false; bool? ExpectedF16c = false; bool? ExpectedFma = false; @@ -109,8 +109,8 @@ static int Main() bool? ExpectedAvx2 = false; bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; - bool? ExpectedAvxVnniInt8 = false; - bool? ExpectedAvxVnniInt16 = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedAvx10v2 = false; bool? ExpectedF16c = false; bool? ExpectedFma = false; @@ -154,9 +154,7 @@ static int Main() bool? ExpectedPclmulqdq = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; - bool? ExpectedAvxVnniInt8 = null; - bool? ExpectedAvxVnniInt16 = null; - bool? ExpectedAvx10v2 = false; + bool? ExpectedAvxVnniInt = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedSha = null; @@ -181,6 +179,8 @@ static int Main() bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; + bool? ExpectedAvxVnniIntV512 = false; + bool? ExpectedAvx10v2 = false; #elif AVX_INTRINSICS_NO_AVX2 bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -189,9 +189,6 @@ static int Main() bool? ExpectedPopcnt = true; bool? ExpectedAvx = true; - bool? ExpectedAvxVnniInt8 = false; - bool? ExpectedAvxVnniInt16 = false; - bool? ExpectedAvx10v2 = false; bool? ExpectedAes = null; bool? ExpectedPclmulqdq = null; bool? ExpectedGfni = null; @@ -226,6 +223,9 @@ static int Main() bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; + bool? ExpectedAvxVnniIntV512 = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvx10v2 = false; #elif AVX2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -244,9 +244,7 @@ static int Main() bool? ExpectedPclmulqdq = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; - bool? ExpectedAvxVnniInt8 = null; - bool? ExpectedAvxVnniInt16 = null; - bool? ExpectedAvx10v2 = false; + bool? ExpectedAvxVnniInt = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedSha = null; @@ -271,6 +269,8 @@ static int Main() bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; + bool? ExpectedAvxVnniIntV512 = false; + bool? ExpectedAvx10v2 = false; #elif AVX512_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -283,9 +283,6 @@ static int Main() bool? ExpectedBmi2 = true; bool? ExpectedF16c = true; bool? ExpectedFma = true; - bool? ExpectedAvxVnniInt8 = null; - bool? ExpectedAvxVnniInt16 = null; - bool? ExpectedAvx10v2 = null; bool? ExpectedLzcnt = true; bool? ExpectedAvx512F = true; bool? ExpectedAvx512BW = true; @@ -298,6 +295,7 @@ static int Main() bool? ExpectedAvx512Vpopcntdq = null; bool? ExpectedAvx512Bf16 = null; bool? ExpectedAvx512Fp16 = null; + bool? ExpectedAvxVnniInt = null; bool? ExpectedAvx10v1 = null; bool? ExpectedAvx10v1V512 = null; bool? ExpectedAes = null; @@ -315,6 +313,9 @@ static int Main() bool? ExpectedPclmulqdqV512 = null; bool? ExpectedWaitPkg = null; bool? ExpectedX86Serialize = null; + + bool? ExpectedAvxVnniIntV512 = false; + bool? ExpectedAvx10v2 = false; #elif AVX10v2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -327,23 +328,22 @@ static int Main() bool? ExpectedBmi2 = true; bool? ExpectedF16c = true; bool? ExpectedFma = true; - bool? ExpectedAvxVnniInt8 = true; - bool? ExpectedAvxVnniInt16 = true; + bool? ExpectedAvxVnniIntV512 = true; bool? ExpectedAvx10v2 = true; bool? ExpectedLzcnt = true; bool? ExpectedAvx512F = true; bool? ExpectedAvx512BW = true; bool? ExpectedAvx512CD = true; bool? ExpectedAvx512DQ = true; - - bool? ExpectedAvx512Vbmi = null; - bool? ExpectedAvx512Bitalg = null; - bool? ExpectedAvx512Vbmi2 = null; - bool? ExpectedAvx512Vpopcntdq = null; - bool? ExpectedAvx512Bf16 = null; - bool? ExpectedAvx512Fp16 = null; + bool? ExpectedAvx512Vbmi = true; + bool? ExpectedAvx512Bitalg = true; + bool? ExpectedAvx512Vbmi2 = true; + bool? ExpectedAvx512Vpopcntdq = true; + bool? ExpectedAvx512Bf16 = true; + bool? ExpectedAvx512Fp16 = true; bool? ExpectedAvx10v1 = true; bool? ExpectedAvx10v1V512 = true; + bool? ExpectedAes = null; bool? ExpectedPclmulqdq = null; bool? ExpectedAvx512Vp2intersect = null; @@ -359,6 +359,7 @@ static int Main() bool? ExpectedPclmulqdqV512 = null; bool? ExpectedWaitPkg = null; bool? ExpectedX86Serialize = null; + bool? ExpectedAvxVnniInt = null; #else #error Who dis? #endif @@ -428,11 +429,11 @@ static int Main() Check("Lzcnt", ExpectedLzcnt, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32); Check("Lzcnt.X64", ExpectedLzcnt, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); - Check("AvxVnniInt8", ExpectedAvxVnniInt8, &AvxVnniInt8IsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("AvxVnniInt8.X64", ExpectedAvxVnniInt8, &AvxVnniInt8X64IsSupported, AvxVnniInt8.X64.IsSupported, null); + Check("AvxVnniInt", ExpectedAvxVnniInt, &AvxVnniIntIsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + // Check("AvxVnniInt.X64", ExpectedAvxVnniInt, &AvxVnniIntX64IsSupported, AvxVnniInt8.X64.IsSupported, null); - Check("AvxVnniInt16", ExpectedAvxVnniInt16, &AvxVnniInt16IsSupported, AvxVnniInt16.IsSupported, () => AvxVnniInt16.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("AvxVnniInt16.X64", ExpectedAvxVnniInt16, &AvxVnniInt16X64IsSupported, AvxVnniInt16.X64.IsSupported, null); + Check("AvxVnniIntV512", ExpectedAvxVnniIntV512, &AvxVnniIntV512IsSupported, AvxVnniInt16.V512.IsSupported, () => AvxVnniInt16.V512.MultiplyWideningAndAdd(Vector512.Zero, Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); + // Check("AvxVnniIntV512.X64", ExpectedAvxVnniIntV512, &AvxVnniIntV512X64IsSupported, AvxVnniInt16.V512.X64.IsSupported, null); Check("Avx10v2", ExpectedAvx10v2, &Avx10v2IsSupported, Avx10v2.IsSupported, () => Avx10v2.MinMax(Vector128.Zero, Vector128.Zero, 0x00).Equals(Vector128.Zero)); Check("Avx10v2.X64", ExpectedAvx10v2, &Avx10v2X64IsSupported, Avx10v2.X64.IsSupported, null); @@ -632,10 +633,10 @@ static int Main() static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; - static bool AvxVnniInt8IsSupported() => AvxVnniInt8.IsSupported; - static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; - static bool AvxVnniInt16IsSupported() => AvxVnniInt16.IsSupported; - static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; + static bool AvxVnniIntIsSupported() => AvxVnniInt8.IsSupported; + // static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; + static bool AvxVnniIntV512IsSupported() => AvxVnniInt16.V512.IsSupported; + // static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; static bool GfniIsSupported() => Gfni.IsSupported; static bool GfniV256IsSupported() => Gfni.V256.IsSupported; From 039fecad006de55344d40b124957f2b04c51010b Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 16 Jun 2025 15:30:23 -0700 Subject: [PATCH 26/32] Remove x64 IS for AVXVNNIINT and refactor code --- src/coreclr/inc/corinfoinstructionset.h | 32 +----- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/jit/compiler.cpp | 103 ++++++++---------- src/coreclr/jit/emitxarch.cpp | 17 +-- src/coreclr/jit/emitxarch.h | 3 +- .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 - .../JitInterface/CorInfoInstructionSet.cs | 52 +-------- .../ThunkGenerator/InstructionSetDesc.txt | 2 - 8 files changed, 67 insertions(+), 156 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 8e7e20341b6cba..9d2f2aa9c482f6 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -94,11 +94,9 @@ enum CORINFO_InstructionSet InstructionSet_AVXIFMA_X64=42, InstructionSet_AVXVNNI_X64=43, InstructionSet_GFNI_X64=44, - InstructionSet_AVXVNNIINT_X64=45, - InstructionSet_AVXVNNIINT_V512_X64=46, - InstructionSet_SHA_X64=47, - InstructionSet_WAITPKG_X64=48, - InstructionSet_X86Serialize_X64=49, + InstructionSet_SHA_X64=45, + InstructionSet_WAITPKG_X64=46, + InstructionSet_X86Serialize_X64=47, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -145,11 +143,9 @@ enum CORINFO_InstructionSet InstructionSet_AVXIFMA_X64=42, InstructionSet_AVXVNNI_X64=43, InstructionSet_GFNI_X64=44, - InstructionSet_AVXVNNIINT_X64=45, - InstructionSet_AVXVNNIINT_V512_X64=46, - InstructionSet_SHA_X64=47, - InstructionSet_WAITPKG_X64=48, - InstructionSet_X86Serialize_X64=49, + InstructionSet_SHA_X64=45, + InstructionSet_WAITPKG_X64=46, + InstructionSet_X86Serialize_X64=47, #endif // TARGET_X86 }; @@ -303,10 +299,6 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_WAITPKG_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); - if (HasInstructionSet(InstructionSet_AVXVNNIINT)) - AddInstructionSet(InstructionSet_AVXVNNIINT_X64); - if (HasInstructionSet(InstructionSet_AVXVNNIINT_V512)) - AddInstructionSet(InstructionSet_AVXVNNIINT_V512_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -467,14 +459,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE42); if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) @@ -777,12 +761,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT512"; case InstructionSet_AVXVNNIINT : return "AVXVNNIINT"; - case InstructionSet_AVXVNNIINT_X64 : - return "AVXVNNIINT_X64"; case InstructionSet_AVXVNNIINT_V512 : return "AVXVNNIINT_V512"; - case InstructionSet_AVXVNNIINT_V512_X64 : - return "AVXVNNIINT_V512_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 312a8cfd871973..43c4c32332df8d 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* a1f8d16e-478c-4c5b-a467-f14a35b48c05 */ - 0xa1f8d16e, - 0x478c, - 0x4c5b, - {0xa4, 0x67, 0xf1, 0x4a, 0x35, 0xb4, 0x8c, 0x05} +constexpr GUID JITEEVersionIdentifier = { /* 5c7eb9f1-a9cb-4a35-aea6-ae93d1f54c56 */ + 0x5c7eb9f1, + 0xa9cb, + 0x4a35, + {0xae, 0xa6, 0xae, 0x93, 0xd1, 0xf5, 0x4c, 0x56} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ddda783b0107d5..82fcdde54c453c 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -501,9 +501,9 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; case InstructionSet_AVXVNNIINT: - return InstructionSet_AVXVNNIINT_X64; + return InstructionSet_AVXVNNIINT; case InstructionSet_AVXVNNIINT_V512: - return InstructionSet_AVXVNNIINT_V512_X64; + return InstructionSet_AVXVNNIINT_V512; case InstructionSet_GFNI: return InstructionSet_GFNI_X64; case InstructionSet_SHA: @@ -618,6 +618,45 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) } } +#else // TARGET_ARM64 +//------------------------------------------------------------------------ +// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The 64-bit only InstructionSet associated with isa +static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AdvSimd: + return InstructionSet_AdvSimd_Arm64; + case InstructionSet_Aes: + return InstructionSet_Aes_Arm64; + case InstructionSet_ArmBase: + return InstructionSet_ArmBase_Arm64; + case InstructionSet_Crc32: + return InstructionSet_Crc32_Arm64; + case InstructionSet_Dp: + return InstructionSet_Dp_Arm64; + case InstructionSet_Sha1: + return InstructionSet_Sha1_Arm64; + case InstructionSet_Sha256: + return InstructionSet_Sha256_Arm64; + case InstructionSet_Rdm: + return InstructionSet_Rdm_Arm64; + case InstructionSet_Sve: + return InstructionSet_Sve_Arm64; + case InstructionSet_Sve2: + return InstructionSet_Sve2_Arm64; + default: + return InstructionSet_NONE; + } +} +#endif // TARGET_XARCH + //------------------------------------------------------------------------ // lookupInstructionSet: Gets the InstructionSet for a given class name // @@ -629,7 +668,7 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) { assert(className != nullptr); - +#ifdef TARGET_XARCH if (className[0] == 'A') { if (strcmp(className + 1, "es") == 0) @@ -869,60 +908,7 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) return InstructionSet_X86Serialize; } } - - return InstructionSet_ILLEGAL; -} - -#else // TARGET_ARM64 -//------------------------------------------------------------------------ -// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The 64-bit only InstructionSet associated with isa -static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AdvSimd: - return InstructionSet_AdvSimd_Arm64; - case InstructionSet_Aes: - return InstructionSet_Aes_Arm64; - case InstructionSet_ArmBase: - return InstructionSet_ArmBase_Arm64; - case InstructionSet_Crc32: - return InstructionSet_Crc32_Arm64; - case InstructionSet_Dp: - return InstructionSet_Dp_Arm64; - case InstructionSet_Sha1: - return InstructionSet_Sha1_Arm64; - case InstructionSet_Sha256: - return InstructionSet_Sha256_Arm64; - case InstructionSet_Rdm: - return InstructionSet_Rdm_Arm64; - case InstructionSet_Sve: - return InstructionSet_Sve_Arm64; - case InstructionSet_Sve2: - return InstructionSet_Sve2_Arm64; - default: - return InstructionSet_NONE; - } -} - -//------------------------------------------------------------------------ -// lookupInstructionSet: Gets the InstructionSet for a given class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// -// Return Value: -// The InstructionSet associated with className -CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) -{ - assert(className != nullptr); - +#else // TARGET_AMD64 if (className[0] == 'A') { if (strcmp(className, "AdvSimd") == 0) @@ -989,10 +975,9 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) return InstructionSet_Vector128; } } - +#endif // TARGET_XARCH return InstructionSet_ILLEGAL; } -#endif // TARGET_XARCH //------------------------------------------------------------------------ // lookupIsa: Gets the InstructionSet for a given class name and enclosing class name diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 45d7c1ccadd2cc..162d7f73167b5b 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -92,19 +92,13 @@ bool emitter::IsApxOnlyInstruction(instruction ins) bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) { - return (ins >= FIRST_AVXVNNI_INSTRUCTION && ins <= LAST_AVXVNNI_INSTRUCTION) || - (ins >= FIRST_AVXVNNIINT8_INSTRUCTION && ins <= LAST_AVXVNNIINT8_INSTRUCTION) || - (ins >= FIRST_AVXVNNIINT16_INSTRUCTION && ins <= LAST_AVXVNNIINT16_INSTRUCTION); + return (ins >= FIRST_AVXVNNI_INSTRUCTION && ins <= LAST_AVXVNNI_INSTRUCTION) || IsAVXVNNIINTInstruction(ins); } -bool emitter::IsAVXVNNIINT8Instruction(instruction ins) +bool emitter::IsAVXVNNIINTInstruction(instruction ins) { - return (ins >= FIRST_AVXVNNIINT8_INSTRUCTION) && (ins <= LAST_AVXVNNIINT8_INSTRUCTION); -} - -bool emitter::IsAVXVNNIINT16Instruction(instruction ins) -{ - return (ins >= FIRST_AVXVNNIINT16_INSTRUCTION) && (ins <= LAST_AVXVNNIINT16_INSTRUCTION); + return (ins >= FIRST_AVXVNNIINT8_INSTRUCTION && ins <= LAST_AVXVNNIINT8_INSTRUCTION) || + (ins >= FIRST_AVXVNNIINT16_INSTRUCTION && ins <= LAST_AVXVNNIINT16_INSTRUCTION); } bool emitter::Is3OpRmwInstruction(instruction ins) @@ -18220,8 +18214,7 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI case INS_TT_FULL: { instruction ins = id->idIns(); - assert((inputSize == 4 || inputSize == 8) || IsAVXVNNIINT8Instruction(ins) || - IsAVXVNNIINT16Instruction(ins)); + assert((inputSize == 4 || inputSize == 8) || IsAVXVNNIINTInstruction(ins)); if (HasEmbeddedBroadcast(id)) { // N = input size in bytes diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 6970afcc136e7c..e4358ada250d35 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -122,8 +122,7 @@ static bool IsAVXOnlyInstruction(instruction ins); static bool IsAvx512OnlyInstruction(instruction ins); static bool IsKMOVInstruction(instruction ins); static bool IsAVXVNNIFamilyInstruction(instruction ins); -static bool IsAVXVNNIINT8Instruction(instruction ins); -static bool IsAVXVNNIINT16Instruction(instruction ins); +static bool IsAVXVNNIINTInstruction(instruction ins); static bool Is3OpRmwInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index d320e21a546f75..ef3c8b4d7e6b65 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -117,9 +117,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X64_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; - case InstructionSet.X64_AVXVNNIINT_X64: return ReadyToRunInstructionSet.AvxVnniInt8; case InstructionSet.X64_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; - case InstructionSet.X64_AVXVNNIINT_V512_X64: return ReadyToRunInstructionSet.AvxVnniInt8_V512; default: throw new Exception("Unknown instruction set"); } @@ -175,9 +173,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X86_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; - case InstructionSet.X86_AVXVNNIINT_X64: return null; case InstructionSet.X86_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; - case InstructionSet.X86_AVXVNNIINT_V512_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 4cc3d67528e22f..0e3e406f989c8b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -90,8 +90,6 @@ public enum InstructionSet X64_AVXIFMA_X64 = InstructionSet_X64.AVXIFMA_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, - X64_AVXVNNIINT_X64 = InstructionSet_X64.AVXVNNIINT_X64, - X64_AVXVNNIINT_V512_X64 = InstructionSet_X64.AVXVNNIINT_V512_X64, X64_SHA_X64 = InstructionSet_X64.SHA_X64, X64_WAITPKG_X64 = InstructionSet_X64.WAITPKG_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, @@ -139,8 +137,6 @@ public enum InstructionSet X86_AVXIFMA_X64 = InstructionSet_X86.AVXIFMA_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, - X86_AVXVNNIINT_X64 = InstructionSet_X86.AVXVNNIINT_X64, - X86_AVXVNNIINT_V512_X64 = InstructionSet_X86.AVXVNNIINT_V512_X64, X86_SHA_X64 = InstructionSet_X86.SHA_X64, X86_WAITPKG_X64 = InstructionSet_X86.WAITPKG_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, @@ -235,11 +231,9 @@ public enum InstructionSet_X64 AVXIFMA_X64 = 42, AVXVNNI_X64 = 43, GFNI_X64 = 44, - AVXVNNIINT_X64 = 45, - AVXVNNIINT_V512_X64 = 46, - SHA_X64 = 47, - WAITPKG_X64 = 48, - X86Serialize_X64 = 49, + SHA_X64 = 45, + WAITPKG_X64 = 46, + X86Serialize_X64 = 47, } public enum InstructionSet_X86 @@ -290,11 +284,9 @@ public enum InstructionSet_X86 AVXIFMA_X64 = 42, AVXVNNI_X64 = 43, GFNI_X64 = 44, - AVXVNNIINT_X64 = 45, - AVXVNNIINT_V512_X64 = 46, - SHA_X64 = 47, - WAITPKG_X64 = 48, - X86Serialize_X64 = 49, + SHA_X64 = 45, + WAITPKG_X64 = 46, + X86Serialize_X64 = 47, } public unsafe struct InstructionSetFlags : IEnumerable @@ -597,14 +589,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -847,10 +831,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) @@ -1276,10 +1256,6 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_WAITPKG_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) - AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); - if (HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) - AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); break; case TargetArchitecture.X86: @@ -1326,8 +1302,6 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_SHA_X64); AddInstructionSet(InstructionSet.X64_WAITPKG_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - AddInstructionSet(InstructionSet.X64_AVXVNNIINT_X64); - AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512_X64); break; case TargetArchitecture.X86: @@ -1348,8 +1322,6 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_SHA_X64); AddInstructionSet(InstructionSet.X86_WAITPKG_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); - AddInstructionSet(InstructionSet.X86_AVXVNNIINT_X64); - AddInstructionSet(InstructionSet.X86_AVXVNNIINT_V512_X64); break; } } @@ -1800,24 +1772,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_X86Serialize; } case "AvxVnniInt8": - if (nestedTypeName == "X64") - { return InstructionSet.X64_AVXVNNIINT_X64; } - else - if (nestedTypeName == "V512_X64") - { return InstructionSet.X64_AVXVNNIINT_V512_X64; } - else if (nestedTypeName == "V512") { return InstructionSet.X64_AVXVNNIINT_V512; } else { return InstructionSet.X64_AVXVNNIINT; } case "AvxVnniInt16": - if (nestedTypeName == "X64") - { return InstructionSet.X64_AVXVNNIINT_X64; } - else - if (nestedTypeName == "V512_X64") - { return InstructionSet.X64_AVXVNNIINT_V512_X64; } - else if (nestedTypeName == "V512") { return InstructionSet.X64_AVXVNNIINT_V512; } else diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5e59f64ce6d0e4..24c3f474ab14de 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -141,8 +141,6 @@ instructionset64bit,X86 ,AVXIFMA instructionset64bit,X86 ,AVXVNNI instructionset64bit,X86 ,GFNI -instructionset64bit,X86 ,AVXVNNIINT -instructionset64bit,X86 ,AVXVNNIINT_V512 instructionset64bit,X86 ,SHA instructionset64bit,X86 ,WAITPKG instructionset64bit,X86 ,X86Serialize From e480bdc3828f77526f29b1acb854aadd0cef0539 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 16 Jun 2025 21:21:45 -0700 Subject: [PATCH 27/32] Remove X64 ISas for AVXVNNIINT --- src/coreclr/jit/hwintrinsic.cpp | 2 -- src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 9e9d369615969b..da215aa35c8d3f 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -947,8 +947,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // AVXIFMA_X64 { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 { NI_Illegal, NI_Illegal }, // GFNI_X64 - { NI_Illegal, NI_Illegal }, // AVXVNNIINT_X64 - { NI_Illegal, NI_Illegal }, // AVXVNNIINT_V512_X64 { NI_Illegal, NI_Illegal }, // SHA_X64 { NI_Illegal, NI_Illegal }, // WAITPKG_X64 { NI_Illegal, NI_Illegal }, // X86Serialize_X64 diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index f1d836d6c3e12e..9c8da6eac983f3 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -102,7 +102,6 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) if ((flags & Avx10v2) != 0) { builder.AddSupportedInstructionSet("avx10v2"); - builder.AddSupportedInstructionSet("avxvnniint"); builder.AddSupportedInstructionSet("avxvnniint_v512"); } if ((flags & AvxVnniInt) != 0) @@ -196,9 +195,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_V256 => (Gfni | Avx), InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), InstructionSet.X64_AVXVNNIINT => AvxVnniInt, - InstructionSet.X64_AVXVNNIINT_X64 => AvxVnniInt, InstructionSet.X64_AVXVNNIINT_V512 => Avx10v2, - InstructionSet.X64_AVXVNNIINT_V512_X64 => Avx10v2, InstructionSet.X64_SHA => Sha, InstructionSet.X64_SHA_X64 => Sha, From 417fca0e6c2ccbd46275b6632abb18e7cc179419 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Sun, 22 Jun 2025 16:40:13 -0700 Subject: [PATCH 28/32] Update src/native/minipal/cpufeatures.c Co-authored-by: Tanner Gooding --- src/native/minipal/cpufeatures.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 18a7a0e9b4c9a1..9122f8411c9b52 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -385,12 +385,10 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } - if ((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) // AVX-VNNI-INT8 + if (((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) && // AVX-VNNI-INT8 + ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0)) // AVX-VNNI-INT16 { - if ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0) - { - result |= XArchIntrinsicConstants_AvxVnniInt; // AVX-VNNI-INT16 - } + result |= XArchIntrinsicConstants_AvxVnniInt; } if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0) // AVX-IFMA From 403899317b68026a5e9f4f3edb2fb6f1c61c2004 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 23 Jun 2025 20:51:33 -0700 Subject: [PATCH 29/32] Refactoring --- src/coreclr/jit/compiler.cpp | 578 ------------------ src/coreclr/jit/hwintrinsicarm64.cpp | 157 +++++ src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 22 +- src/coreclr/jit/hwintrinsicxarch.cpp | 462 ++++++++++++++ .../SmokeTests/HardwareIntrinsics/Program.cs | 6 +- 5 files changed, 631 insertions(+), 594 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d2d3b13b6722fd..29bdd306e7cd2c 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -461,584 +461,6 @@ Compiler::Compiler(ArenaAllocator* arena, info.compUsesAsyncContinuation = false; } -#ifdef FEATURE_HW_INTRINSICS -#if defined(TARGET_XARCH) -//------------------------------------------------------------------------ -// X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The 64-bit only InstructionSet associated with isa -static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_X86Base: - return InstructionSet_X86Base_X64; - case InstructionSet_SSE42: - return InstructionSet_SSE42_X64; - case InstructionSet_AVX: - return InstructionSet_AVX_X64; - case InstructionSet_AVX2: - return InstructionSet_AVX2_X64; - case InstructionSet_AVX512: - return InstructionSet_AVX512_X64; - case InstructionSet_AVX512v2: - return InstructionSet_AVX512v2_X64; - case InstructionSet_AVX512v3: - return InstructionSet_AVX512v3_X64; - case InstructionSet_AVX10v1: - return InstructionSet_AVX10v1_X64; - case InstructionSet_AVX10v2: - return InstructionSet_AVX10v2_X64; - case InstructionSet_AES: - return InstructionSet_AES_X64; - case InstructionSet_AVX512VP2INTERSECT: - return InstructionSet_AVX512VP2INTERSECT_X64; - case InstructionSet_AVXIFMA: - return InstructionSet_AVXIFMA_X64; - case InstructionSet_AVXVNNI: - return InstructionSet_AVXVNNI_X64; - case InstructionSet_AVXVNNIINT: - return InstructionSet_AVXVNNIINT; - case InstructionSet_AVXVNNIINT_V512: - return InstructionSet_AVXVNNIINT_V512; - case InstructionSet_GFNI: - return InstructionSet_GFNI_X64; - case InstructionSet_SHA: - return InstructionSet_SHA_X64; - case InstructionSet_WAITPKG: - return InstructionSet_WAITPKG_X64; - case InstructionSet_X86Serialize: - return InstructionSet_X86Serialize_X64; - default: - return InstructionSet_NONE; - } -} - -//------------------------------------------------------------------------ -// VLVersionOfIsa: Gets the corresponding AVX512VL only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The AVX512VL only InstructionSet associated with isa -static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AVX512: - case InstructionSet_AVX512v2: - case InstructionSet_AVX512v3: - case InstructionSet_AVX10v1: - { - // These nested ISAs aren't tracked by the JIT support - return isa; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -//------------------------------------------------------------------------ -// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The V256 only InstructionSet associated with isa -static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AES: - { - return InstructionSet_AES_V256; - } - - case InstructionSet_GFNI: - { - return InstructionSet_GFNI_V256; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -//------------------------------------------------------------------------ -// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The V512 only InstructionSet associated with isa -static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v2: - case InstructionSet_AVX10v2_X64: - { - // These nested ISAs aren't tracked by the JIT support - return isa; - } - - case InstructionSet_AES: - { - return InstructionSet_AES_V512; - } - - case InstructionSet_GFNI: - { - return InstructionSet_GFNI_V512; - } - - case InstructionSet_AVXVNNIINT: - case InstructionSet_AVXVNNIINT_V512: - { - return InstructionSet_AVXVNNIINT_V512; - } - - default: - { - return InstructionSet_NONE; - } - } -} - -#else // TARGET_ARM64 -//------------------------------------------------------------------------ -// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet -// -// Arguments: -// isa -- The InstructionSet ID -// -// Return Value: -// The 64-bit only InstructionSet associated with isa -static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_AdvSimd: - return InstructionSet_AdvSimd_Arm64; - case InstructionSet_Aes: - return InstructionSet_Aes_Arm64; - case InstructionSet_ArmBase: - return InstructionSet_ArmBase_Arm64; - case InstructionSet_Crc32: - return InstructionSet_Crc32_Arm64; - case InstructionSet_Dp: - return InstructionSet_Dp_Arm64; - case InstructionSet_Sha1: - return InstructionSet_Sha1_Arm64; - case InstructionSet_Sha256: - return InstructionSet_Sha256_Arm64; - case InstructionSet_Rdm: - return InstructionSet_Rdm_Arm64; - case InstructionSet_Sve: - return InstructionSet_Sve_Arm64; - case InstructionSet_Sve2: - return InstructionSet_Sve2_Arm64; - default: - return InstructionSet_NONE; - } -} -#endif // TARGET_XARCH - -//------------------------------------------------------------------------ -// lookupInstructionSet: Gets the InstructionSet for a given class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// -// Return Value: -// The InstructionSet associated with className -CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) -{ - assert(className != nullptr); -#ifdef TARGET_XARCH - if (className[0] == 'A') - { - if (strcmp(className + 1, "es") == 0) - { - return InstructionSet_AES; - } - else if (strncmp(className + 1, "vx", 2) == 0) - { - if (className[3] == '\0') - { - return InstructionSet_AVX; - } - else if (strncmp(className + 3, "10v", 3) == 0) - { - if (strcmp(className + 6, "1") == 0) - { - return InstructionSet_AVX10v1; - } - else if (strcmp(className + 6, "2") == 0) - { - return InstructionSet_AVX10v2; - } - } - else if (strcmp(className + 3, "2") == 0) - { - return InstructionSet_AVX2; - } - else if (strncmp(className + 3, "512", 3) == 0) - { - if (className[6] == 'B') - { - if (strcmp(className + 7, "italg") == 0) - { - return InstructionSet_AVX512v3; - } - else if (strcmp(className + 7, "f16") == 0) - { - return InstructionSet_AVX10v1; - } - else if (strcmp(className + 7, "W") == 0) - { - return InstructionSet_AVX512; - } - } - else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) - { - return InstructionSet_AVX512; - } - else if (className[6] == 'F') - { - if (className[7] == '\0') - { - return InstructionSet_AVX512; - } - else if (strcmp(className + 7, "p16") == 0) - { - return InstructionSet_AVX10v1; - } - } - else if (className[6] == 'V') - { - if (strncmp(className + 7, "bmi", 3) == 0) - { - if (className[10] == '\0') - { - return InstructionSet_AVX512v2; - } - else if (strcmp(className + 10, "2") == 0) - { - return InstructionSet_AVX512v3; - } - } - else if (className[7] == 'p') - { - if (strcmp(className + 8, "p2intersect") == 0) - { - return InstructionSet_AVX512VP2INTERSECT; - } - else if (strcmp(className + 8, "opcntdq") == 0) - { - return InstructionSet_AVX512v3; - } - } - } - } - else if (strcmp(className + 3, "Ifma") == 0) - { - return InstructionSet_AVXIFMA; - } - else if (strncmp(className + 3, "Vnni", 4) == 0) - { - if (className[7] == '\0') - { - return InstructionSet_AVXVNNI; - } - else if (strncmp(className + 7, "Int", 3) == 0) - { - if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) - { - if (compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) - { - return InstructionSet_AVXVNNIINT; - } - else - { - return InstructionSet_AVXVNNIINT_V512; - } - } - } - } - } - } - else if (className[0] == 'B') - { - if (strncmp(className + 1, "mi", 2) == 0) - { - if (strcmp(className + 3, "1") == 0) - { - return InstructionSet_AVX2; - } - else if (strcmp(className + 3, "2") == 0) - { - return InstructionSet_AVX2; - } - } - } - else if (className[0] == 'F') - { - if (strcmp(className + 1, "ma") == 0) - { - return InstructionSet_AVX2; - } - else if (strcmp(className + 1, "16c") == 0) - { - return InstructionSet_AVX2; - } - } - else if (className[0] == 'G') - { - if (strcmp(className + 1, "fni") == 0) - { - return InstructionSet_GFNI; - } - } - else if (className[0] == 'L') - { - if (strcmp(className + 1, "zcnt") == 0) - { - return InstructionSet_AVX2; - } - } - else if (className[0] == 'P') - { - if (strcmp(className + 1, "clmulqdq") == 0) - { - return InstructionSet_AES; - } - else if (strcmp(className + 1, "opcnt") == 0) - { - return InstructionSet_SSE42; - } - } - else if (className[0] == 'S') - { - if (strcmp(className + 1, "ha") == 0) - { - return InstructionSet_SHA; - } - else if (strncmp(className + 1, "se", 2) == 0) - { - if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) - { - return InstructionSet_X86Base; - } - else if (strcmp(className + 3, "3") == 0) - { - return InstructionSet_SSE42; - } - else if (strcmp(className + 3, "41") == 0) - { - return InstructionSet_SSE42; - } - else if (strcmp(className + 3, "42") == 0) - { - return InstructionSet_SSE42; - } - } - else if (strcmp(className + 1, "sse3") == 0) - { - return InstructionSet_SSE42; - } - } - else if (className[0] == 'V') - { - if (strncmp(className + 1, "ector", 5) == 0) - { - if (strncmp(className + 6, "128", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector128; - } - } - else if (strncmp(className + 6, "256", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector256; - } - } - else if (strncmp(className + 6, "512", 3) == 0) - { - if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) - { - return InstructionSet_Vector512; - } - } - } - else if (strcmp(className + 1, "L") == 0) - { - assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); - return InstructionSet_ILLEGAL; - } - } - else if (strcmp(className, "WaitPkg") == 0) - { - return InstructionSet_WAITPKG; - } - else if (strncmp(className, "X86", 3) == 0) - { - if (strcmp(className + 3, "Base") == 0) - { - return InstructionSet_X86Base; - } - else if (strcmp(className + 3, "Serialize") == 0) - { - return InstructionSet_X86Serialize; - } - } -#else // TARGET_AMD64 - if (className[0] == 'A') - { - if (strcmp(className, "AdvSimd") == 0) - { - return InstructionSet_AdvSimd; - } - if (strcmp(className, "Aes") == 0) - { - return InstructionSet_Aes; - } - if (strcmp(className, "ArmBase") == 0) - { - return InstructionSet_ArmBase; - } - } - else if (className[0] == 'C') - { - if (strcmp(className, "Crc32") == 0) - { - return InstructionSet_Crc32; - } - } - else if (className[0] == 'D') - { - if (strcmp(className, "Dp") == 0) - { - return InstructionSet_Dp; - } - } - else if (className[0] == 'R') - { - if (strcmp(className, "Rdm") == 0) - { - return InstructionSet_Rdm; - } - } - else if (className[0] == 'S') - { - if (strcmp(className, "Sha1") == 0) - { - return InstructionSet_Sha1; - } - if (strcmp(className, "Sha256") == 0) - { - return InstructionSet_Sha256; - } - if (strcmp(className, "Sve2") == 0) - { - return InstructionSet_Sve2; - } - if (strcmp(className, "Sve") == 0) - { - return InstructionSet_Sve; - } - } - else if (className[0] == 'V') - { - if (strncmp(className, "Vector64", 8) == 0) - { - return InstructionSet_Vector64; - } - else if (strncmp(className, "Vector128", 9) == 0) - { - return InstructionSet_Vector128; - } - } -#endif // TARGET_XARCH - return InstructionSet_ILLEGAL; -} - -//------------------------------------------------------------------------ -// lookupIsa: Gets the InstructionSet for a given class name and enclosing class name -// -// Arguments: -// className -- The name of the class associated with the InstructionSet to lookup -// innerEnclosingClassName -- The name of the inner enclosing class of X64 classes -// outerEnclosingClassName -- The name of the outer enclosing class of X64 classes -// -// Return Value: -// The InstructionSet associated with className and enclosingClassName -CORINFO_InstructionSet Compiler::lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName) -{ - assert(className != nullptr); - - if (innerEnclosingClassName == nullptr) - { - // No nested class is the most common, so fast path it - return lookupInstructionSet(className); - } - - // Since lookupId is only called for the xplat intrinsics - // or intrinsics in the platform specific namespace, we assume - // that it will be one we can handle and don't try to early out. - - CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); - -#if defined(TARGET_XARCH) - if (className[0] == 'V') - { - if (strcmp(className, "V256") == 0) - { - return V256VersionOfIsa(enclosingIsa); - } - else if (strcmp(className, "V512") == 0) - { - return V512VersionOfIsa(enclosingIsa); - } - else if (strcmp(className, "VL") == 0) - { - return VLVersionOfIsa(enclosingIsa); - } - } - else if (strcmp(className, "X64") == 0) - { - return X64VersionOfIsa(enclosingIsa); - } -#else // TARGET_ARM64 - if (strcmp(className, "Arm64") == 0) - { - return Arm64VersionOfIsa(enclosingIsa); - } -#endif // TARGET_XARCH - - return InstructionSet_ILLEGAL; -} -#endif // FEATURE_HW_INTRINSICS - //------------------------------------------------------------------------ // getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types // diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a86dd7e83ed8dc..46071e26be9b2f 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -5,6 +5,163 @@ #include "hwintrinsic.h" #ifdef FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// Arm64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The 64-bit only InstructionSet associated with isa +static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AdvSimd: + return InstructionSet_AdvSimd_Arm64; + case InstructionSet_Aes: + return InstructionSet_Aes_Arm64; + case InstructionSet_ArmBase: + return InstructionSet_ArmBase_Arm64; + case InstructionSet_Crc32: + return InstructionSet_Crc32_Arm64; + case InstructionSet_Dp: + return InstructionSet_Dp_Arm64; + case InstructionSet_Sha1: + return InstructionSet_Sha1_Arm64; + case InstructionSet_Sha256: + return InstructionSet_Sha256_Arm64; + case InstructionSet_Rdm: + return InstructionSet_Rdm_Arm64; + case InstructionSet_Sve: + return InstructionSet_Sve_Arm64; + case InstructionSet_Sve2: + return InstructionSet_Sve2_Arm64; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// lookupInstructionSet: Gets the InstructionSet for a given class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// +// Return Value: +// The InstructionSet associated with className +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) +{ + assert(className != nullptr); + + if (className[0] == 'A') + { + if (strcmp(className, "AdvSimd") == 0) + { + return InstructionSet_AdvSimd; + } + if (strcmp(className, "Aes") == 0) + { + return InstructionSet_Aes; + } + if (strcmp(className, "ArmBase") == 0) + { + return InstructionSet_ArmBase; + } + } + else if (className[0] == 'C') + { + if (strcmp(className, "Crc32") == 0) + { + return InstructionSet_Crc32; + } + } + else if (className[0] == 'D') + { + if (strcmp(className, "Dp") == 0) + { + return InstructionSet_Dp; + } + } + else if (className[0] == 'R') + { + if (strcmp(className, "Rdm") == 0) + { + return InstructionSet_Rdm; + } + } + else if (className[0] == 'S') + { + if (strcmp(className, "Sha1") == 0) + { + return InstructionSet_Sha1; + } + if (strcmp(className, "Sha256") == 0) + { + return InstructionSet_Sha256; + } + if (strcmp(className, "Sve2") == 0) + { + return InstructionSet_Sve2; + } + if (strcmp(className, "Sve") == 0) + { + return InstructionSet_Sve; + } + } + else if (className[0] == 'V') + { + if (strncmp(className, "Vector64", 8) == 0) + { + return InstructionSet_Vector64; + } + else if (strncmp(className, "Vector128", 9) == 0) + { + return InstructionSet_Vector128; + } + } + + return InstructionSet_ILLEGAL; +} + +//------------------------------------------------------------------------ +// lookupIsa: Gets the InstructionSet for a given class name and enclsoing class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// innerEnclosingClassName -- The name of the inner enclosing class or nullptr if one doesn't exist +// outerEnclosingClassName -- The name of the outer enclosing class or nullptr if one doesn't exist +// +// Return Value: +// The InstructionSet associated with className and enclosingClassName +// +CORINFO_InstructionSet Compiler::lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName) +{ + assert(className != nullptr); + + if (innerEnclosingClassName == nullptr) + { + // No nested class is the most common, so fast path it + return lookupInstructionSet(className); + } + + // Since lookupId is only called for the xplat intrinsics + // or intrinsics in the platform specific namespace, we assume + // that it will be one we can handle and don't try to early out. + + CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); + + if (strcmp(className, "Arm64") == 0) + { + return Arm64VersionOfIsa(enclosingIsa); + } + + return InstructionSet_ILLEGAL; +} + //------------------------------------------------------------------------ // lookupIval: Gets a the implicit immediate value for the given intrinsic // diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index d28c155f6d876e..93daf2374f8c31 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -881,17 +881,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVXVNNI_MultiplyWideningAndAdd: - case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - { - assert(targetReg != REG_NA); - assert(op1Reg != REG_NA); - assert(op2Reg != REG_NA); - - genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); - break; - } - case NI_AVX512_CompressMask: case NI_AVX512_ExpandMask: { @@ -914,6 +903,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + default: { unreached(); diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 7a9a04a7285e95..0babb838f10092 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -5,6 +5,468 @@ #include "hwintrinsic.h" #ifdef FEATURE_HW_INTRINSICS + +//------------------------------------------------------------------------ +// X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The 64-bit only InstructionSet associated with isa +static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_X86Base: + return InstructionSet_X86Base_X64; + case InstructionSet_SSE42: + return InstructionSet_SSE42_X64; + case InstructionSet_AVX: + return InstructionSet_AVX_X64; + case InstructionSet_AVX2: + return InstructionSet_AVX2_X64; + case InstructionSet_AVX512: + return InstructionSet_AVX512_X64; + case InstructionSet_AVX512v2: + return InstructionSet_AVX512v2_X64; + case InstructionSet_AVX512v3: + return InstructionSet_AVX512v3_X64; + case InstructionSet_AVX10v1: + return InstructionSet_AVX10v1_X64; + case InstructionSet_AVX10v2: + return InstructionSet_AVX10v2_X64; + case InstructionSet_AES: + return InstructionSet_AES_X64; + case InstructionSet_AVX512VP2INTERSECT: + return InstructionSet_AVX512VP2INTERSECT_X64; + case InstructionSet_AVXIFMA: + return InstructionSet_AVXIFMA_X64; + case InstructionSet_AVXVNNI: + return InstructionSet_AVXVNNI_X64; + case InstructionSet_AVXVNNIINT: + return InstructionSet_AVXVNNIINT; + case InstructionSet_AVXVNNIINT_V512: + return InstructionSet_AVXVNNIINT_V512; + case InstructionSet_GFNI: + return InstructionSet_GFNI_X64; + case InstructionSet_SHA: + return InstructionSet_SHA_X64; + case InstructionSet_WAITPKG: + return InstructionSet_WAITPKG_X64; + case InstructionSet_X86Serialize: + return InstructionSet_X86Serialize_X64; + default: + return InstructionSet_NONE; + } +} + +//------------------------------------------------------------------------ +// VLVersionOfIsa: Gets the corresponding AVX512VL only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The AVX512VL only InstructionSet associated with isa +static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AVX512: + case InstructionSet_AVX512v2: + case InstructionSet_AVX512v3: + case InstructionSet_AVX10v1: + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V256 only InstructionSet associated with isa +static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AES: + { + return InstructionSet_AES_V256; + } + + case InstructionSet_GFNI: + { + return InstructionSet_GFNI_V256; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet +// +// Arguments: +// isa -- The InstructionSet ID +// +// Return Value: +// The V512 only InstructionSet associated with isa +static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_AVX10v1: + case InstructionSet_AVX10v1_X64: + case InstructionSet_AVX10v2: + case InstructionSet_AVX10v2_X64: + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + + case InstructionSet_AES: + { + return InstructionSet_AES_V512; + } + + case InstructionSet_GFNI: + { + return InstructionSet_GFNI_V512; + } + + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: + { + return InstructionSet_AVXVNNIINT_V512; + } + + default: + { + return InstructionSet_NONE; + } + } +} + +//------------------------------------------------------------------------ +// lookupInstructionSet: Gets the InstructionSet for a given class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// +// Return Value: +// The InstructionSet associated with className +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) +{ + assert(className != nullptr); + + if (className[0] == 'A') + { + if (strcmp(className + 1, "es") == 0) + { + return InstructionSet_AES; + } + else if (strncmp(className + 1, "vx", 2) == 0) + { + if (className[3] == '\0') + { + return InstructionSet_AVX; + } + else if (strncmp(className + 3, "10v", 3) == 0) + { + if (strcmp(className + 6, "1") == 0) + { + return InstructionSet_AVX10v1; + } + else if (strcmp(className + 6, "2") == 0) + { + return InstructionSet_AVX10v2; + } + } + else if (strcmp(className + 3, "2") == 0) + { + return InstructionSet_AVX2; + } + else if (strncmp(className + 3, "512", 3) == 0) + { + if (className[6] == 'B') + { + if (strcmp(className + 7, "italg") == 0) + { + return InstructionSet_AVX512v3; + } + else if (strcmp(className + 7, "f16") == 0) + { + return InstructionSet_AVX10v1; + } + else if (strcmp(className + 7, "W") == 0) + { + return InstructionSet_AVX512; + } + } + else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) + { + return InstructionSet_AVX512; + } + else if (className[6] == 'F') + { + if (className[7] == '\0') + { + return InstructionSet_AVX512; + } + else if (strcmp(className + 7, "p16") == 0) + { + return InstructionSet_AVX10v1; + } + } + else if (className[6] == 'V') + { + if (strncmp(className + 7, "bmi", 3) == 0) + { + if (className[10] == '\0') + { + return InstructionSet_AVX512v2; + } + else if (strcmp(className + 10, "2") == 0) + { + return InstructionSet_AVX512v3; + } + } + else if (className[7] == 'p') + { + if (strcmp(className + 8, "p2intersect") == 0) + { + return InstructionSet_AVX512VP2INTERSECT; + } + else if (strcmp(className + 8, "opcntdq") == 0) + { + return InstructionSet_AVX512v3; + } + } + } + } + else if (strcmp(className + 3, "Ifma") == 0) + { + return InstructionSet_AVXIFMA; + } + else if (strncmp(className + 3, "Vnni", 4) == 0) + { + if (className[7] == '\0') + { + return InstructionSet_AVXVNNI; + } + else if (strncmp(className + 7, "Int", 3) == 0) + { + if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) + { + if (compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) + { + return InstructionSet_AVXVNNIINT; + } + else + { + return InstructionSet_AVXVNNIINT_V512; + } + } + } + } + } + } + else if (className[0] == 'B') + { + if (strncmp(className + 1, "mi", 2) == 0) + { + if (strcmp(className + 3, "1") == 0) + { + return InstructionSet_AVX2; + } + else if (strcmp(className + 3, "2") == 0) + { + return InstructionSet_AVX2; + } + } + } + else if (className[0] == 'F') + { + if (strcmp(className + 1, "ma") == 0) + { + return InstructionSet_AVX2; + } + else if (strcmp(className + 1, "16c") == 0) + { + return InstructionSet_AVX2; + } + } + else if (className[0] == 'G') + { + if (strcmp(className + 1, "fni") == 0) + { + return InstructionSet_GFNI; + } + } + else if (className[0] == 'L') + { + if (strcmp(className + 1, "zcnt") == 0) + { + return InstructionSet_AVX2; + } + } + else if (className[0] == 'P') + { + if (strcmp(className + 1, "clmulqdq") == 0) + { + return InstructionSet_AES; + } + else if (strcmp(className + 1, "opcnt") == 0) + { + return InstructionSet_SSE42; + } + } + else if (className[0] == 'S') + { + if (strcmp(className + 1, "ha") == 0) + { + return InstructionSet_SHA; + } + else if (strncmp(className + 1, "se", 2) == 0) + { + if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) + { + return InstructionSet_X86Base; + } + else if (strcmp(className + 3, "3") == 0) + { + return InstructionSet_SSE42; + } + else if (strcmp(className + 3, "41") == 0) + { + return InstructionSet_SSE42; + } + else if (strcmp(className + 3, "42") == 0) + { + return InstructionSet_SSE42; + } + } + else if (strcmp(className + 1, "sse3") == 0) + { + return InstructionSet_SSE42; + } + } + else if (className[0] == 'V') + { + if (strncmp(className + 1, "ector", 5) == 0) + { + if (strncmp(className + 6, "128", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector128; + } + } + else if (strncmp(className + 6, "256", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector256; + } + } + else if (strncmp(className + 6, "512", 3) == 0) + { + if ((className[9] == '\0') || (strcmp(className + 9, "`1") == 0)) + { + return InstructionSet_Vector512; + } + } + } + else if (strcmp(className + 1, "L") == 0) + { + assert(!"VL.X64 support doesn't exist in the managed libraries and so is not yet implemented"); + return InstructionSet_ILLEGAL; + } + } + else if (strcmp(className, "WaitPkg") == 0) + { + return InstructionSet_WAITPKG; + } + else if (strncmp(className, "X86", 3) == 0) + { + if (strcmp(className + 3, "Base") == 0) + { + return InstructionSet_X86Base; + } + else if (strcmp(className + 3, "Serialize") == 0) + { + return InstructionSet_X86Serialize; + } + } + return InstructionSet_ILLEGAL; +} + +//------------------------------------------------------------------------ +// lookupIsa: Gets the InstructionSet for a given class name and enclosing class name +// +// Arguments: +// className -- The name of the class associated with the InstructionSet to lookup +// innerEnclosingClassName -- The name of the inner enclosing class of X64 classes +// outerEnclosingClassName -- The name of the outer enclosing class of X64 classes +// +// Return Value: +// The InstructionSet associated with className and enclosingClassName +CORINFO_InstructionSet Compiler::lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName) +{ + assert(className != nullptr); + + if (innerEnclosingClassName == nullptr) + { + // No nested class is the most common, so fast path it + return lookupInstructionSet(className); + } + + // Since lookupId is only called for the xplat intrinsics + // or intrinsics in the platform specific namespace, we assume + // that it will be one we can handle and don't try to early out. + + CORINFO_InstructionSet enclosingIsa = lookupIsa(innerEnclosingClassName, outerEnclosingClassName, nullptr); + + if (className[0] == 'V') + { + if (strcmp(className, "V256") == 0) + { + return V256VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "V512") == 0) + { + return V512VersionOfIsa(enclosingIsa); + } + else if (strcmp(className, "VL") == 0) + { + return VLVersionOfIsa(enclosingIsa); + } + } + else if (strcmp(className, "X64") == 0) + { + return X64VersionOfIsa(enclosingIsa); + } + + return InstructionSet_ILLEGAL; +} + //------------------------------------------------------------------------ // lookupImmUpperBound: Gets the upper bound for the imm-value of a given NamedIntrinsic // diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index f5c24549205a28..9a38939ade5c34 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -295,7 +295,6 @@ static int Main() bool? ExpectedAvx512Vpopcntdq = null; bool? ExpectedAvx512Bf16 = null; bool? ExpectedAvx512Fp16 = null; - bool? ExpectedAvxVnniInt = null; bool? ExpectedAvx10v1 = null; bool? ExpectedAvx10v1V512 = null; bool? ExpectedAes = null; @@ -315,6 +314,7 @@ static int Main() bool? ExpectedX86Serialize = null; bool? ExpectedAvxVnniIntV512 = false; + bool? ExpectedAvxVnniInt = false; bool? ExpectedAvx10v2 = false; #elif AVX10v2_INTRINSICS bool? ExpectedSse3 = true; @@ -430,10 +430,8 @@ static int Main() Check("Lzcnt.X64", ExpectedLzcnt, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); Check("AvxVnniInt", ExpectedAvxVnniInt, &AvxVnniIntIsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - // Check("AvxVnniInt.X64", ExpectedAvxVnniInt, &AvxVnniIntX64IsSupported, AvxVnniInt8.X64.IsSupported, null); Check("AvxVnniIntV512", ExpectedAvxVnniIntV512, &AvxVnniIntV512IsSupported, AvxVnniInt16.V512.IsSupported, () => AvxVnniInt16.V512.MultiplyWideningAndAdd(Vector512.Zero, Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); - // Check("AvxVnniIntV512.X64", ExpectedAvxVnniIntV512, &AvxVnniIntV512X64IsSupported, AvxVnniInt16.V512.X64.IsSupported, null); Check("Avx10v2", ExpectedAvx10v2, &Avx10v2IsSupported, Avx10v2.IsSupported, () => Avx10v2.MinMax(Vector128.Zero, Vector128.Zero, 0x00).Equals(Vector128.Zero)); Check("Avx10v2.X64", ExpectedAvx10v2, &Avx10v2X64IsSupported, Avx10v2.X64.IsSupported, null); @@ -634,9 +632,7 @@ static int Main() static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; static bool AvxVnniIntIsSupported() => AvxVnniInt8.IsSupported; - // static bool AvxVnniInt8X64IsSupported() => AvxVnniInt8.X64.IsSupported; static bool AvxVnniIntV512IsSupported() => AvxVnniInt16.V512.IsSupported; - // static bool AvxVnniInt16X64IsSupported() => AvxVnniInt16.X64.IsSupported; static bool GfniIsSupported() => Gfni.IsSupported; static bool GfniV256IsSupported() => Gfni.V256.IsSupported; From 60fb9eb320dff2a7d1f9f2842f329c674f82f359 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 6 Jul 2025 09:58:35 -0700 Subject: [PATCH 30/32] Ensure we check compSupportsHWIntrinsic so that NAOT works --- src/coreclr/jit/emitxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index f451fbfe944f09..b0a65624163f0e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -303,7 +303,7 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const case INS_vpdpbuuds: { // Vex versions of AvxVnniInt8 + AvxVnniInt16 - return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT); + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT); } case INS_vpmadd52huq: @@ -367,7 +367,7 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const case INS_vpdpbuuds: { // Evex versions of AvxVnniInt8 + AvxVnniInt16 will be supported - return emitComp->compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT_V512); + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT_V512); } case INS_vpdpbusd: From 6d0778fc8e91afde9f996a93b9e2b47082c3869e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 6 Jul 2025 10:38:38 -0700 Subject: [PATCH 31/32] Add the missing instruction latencies --- src/coreclr/jit/instrsxarch.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 04cf0c5f4a6f3b..d395369ffc1b07 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -637,21 +637,21 @@ INST3(vpdpwssds, "vpdpwssds", IUM_RW, BAD_CODE, BAD_CODE, #define LAST_AVXVNNI_INSTRUCTION INS_vpdpwssds #define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud -INST3(vpdpwsud, "vpdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "vpdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "vpdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "vpdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "vpdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "vpdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsud, "vpdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "vpdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "vpdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "vpdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "vpdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "vpdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results #define LAST_AVXVNNIINT8_INSTRUCTION INS_vpdpwuuds #define FIRST_AVXVNNIINT16_INSTRUCTION INS_vpdpbssd -INST3(vpdpbssd, "vpdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), ILLEGAL, ILLEGAL, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "vpdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "vpdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "vpdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "vpdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "vpdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssd, "vpdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "vpdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "vpdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "vpdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "vpdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "vpdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results #define LAST_AVXVNNIINT16_INSTRUCTION INS_vpdpbuuds #define FIRST_AVXIFMA_INSTRUCTION INS_vpmadd52huq From cbadb1d73854f85b15af8c476139767e15a72d0e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 6 Jul 2025 11:33:41 -0700 Subject: [PATCH 32/32] Fixing the naot test and InstructionSet query --- src/coreclr/jit/hwintrinsicxarch.cpp | 2 +- .../SmokeTests/HardwareIntrinsics/Program.cs | 39 +++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index d40057a8102e13..7d4d4abd207b5f 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -273,7 +273,7 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) { if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) { - if (compOpportunisticallyDependsOn(InstructionSet_AVXVNNIINT)) + if (compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT)) { return InstructionSet_AVXVNNIINT; } diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 9a38939ade5c34..3083d9f6c583cf 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -64,9 +64,6 @@ static int Main() bool? ExpectedAvx2 = false; bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; - bool? ExpectedAvxVnniInt = false; - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvx10v2 = false; bool? ExpectedF16c = false; bool? ExpectedFma = false; bool? ExpectedLzcnt = false; @@ -82,9 +79,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV256 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV256 = false; @@ -109,9 +109,6 @@ static int Main() bool? ExpectedAvx2 = false; bool? ExpectedBmi1 = false; bool? ExpectedBmi2 = false; - bool? ExpectedAvxVnniInt = false; - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvx10v2 = false; bool? ExpectedF16c = false; bool? ExpectedFma = false; bool? ExpectedLzcnt = false; @@ -127,9 +124,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV256 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV256 = false; @@ -175,12 +175,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvx10v2 = false; #elif AVX_INTRINSICS_NO_AVX2 bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -217,15 +217,15 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvxVnniInt = false; - bool? ExpectedAvx10v2 = false; #elif AVX2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -265,12 +265,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvx10v2 = false; #elif AVX512_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -297,11 +297,14 @@ static int Main() bool? ExpectedAvx512Fp16 = null; bool? ExpectedAvx10v1 = null; bool? ExpectedAvx10v1V512 = null; + bool? ExpectedAvx10v2 = null; bool? ExpectedAes = null; bool? ExpectedPclmulqdq = null; bool? ExpectedAvx512Vp2intersect = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt = null; + bool? ExpectedAvxVnniIntV512 = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedGfniV512 = null; @@ -312,10 +315,6 @@ static int Main() bool? ExpectedPclmulqdqV512 = null; bool? ExpectedWaitPkg = null; bool? ExpectedX86Serialize = null; - - bool? ExpectedAvxVnniIntV512 = false; - bool? ExpectedAvxVnniInt = false; - bool? ExpectedAvx10v2 = false; #elif AVX10v2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -328,8 +327,6 @@ static int Main() bool? ExpectedBmi2 = true; bool? ExpectedF16c = true; bool? ExpectedFma = true; - bool? ExpectedAvxVnniIntV512 = true; - bool? ExpectedAvx10v2 = true; bool? ExpectedLzcnt = true; bool? ExpectedAvx512F = true; bool? ExpectedAvx512BW = true; @@ -343,12 +340,14 @@ static int Main() bool? ExpectedAvx512Fp16 = true; bool? ExpectedAvx10v1 = true; bool? ExpectedAvx10v1V512 = true; + bool? ExpectedAvx10v2 = true; + bool? ExpectedAvxVnni = true; + bool? ExpectedAvxVnniIntV512 = true; bool? ExpectedAes = null; bool? ExpectedPclmulqdq = null; bool? ExpectedAvx512Vp2intersect = null; bool? ExpectedAvxIfma = null; - bool? ExpectedAvxVnni = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedGfniV512 = null;