From d22af4fc699e3a2e840c708ed7651dbd804c0ab6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 13:15:16 -0700 Subject: [PATCH 001/120] Capture g_sve_length and compVectorTLength --- src/coreclr/jit/compiler.cpp | 5 +++++ src/coreclr/jit/compiler.h | 4 ++++ src/coreclr/vm/codeman.cpp | 1 + src/coreclr/vm/codeman.h | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f10eb0f71a439f..b7e08a8415f2ef 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2134,6 +2134,10 @@ unsigned ReinterpretHexAsDecimal(unsigned in) return result; } +#ifdef TARGET_ARM64 +unsigned Compiler::compVectorTLength = 0; +#endif + void Compiler::compInitOptions(JitFlags* jitFlags) { opts = {}; @@ -7751,6 +7755,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd, compilerMem = pParam->pAlloc->allocateMemory(sizeof(Compiler)); } + Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself pParam->pComp = new (compilerMem, jitstd::placement_t()) Compiler(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo, pParam->inlineInfo); #if MEASURE_CLRAPI_CALLS diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a1d76be581eae0..d343807c1e8a76 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2598,6 +2598,10 @@ class Compiler EHNodeDsc* ehnTree; // root of the tree comprising the EHnodes. EHNodeDsc* ehnNext; // root of the tree comprising the EHnodes. +#if defined(TARGET_ARM64) + static unsigned compVectorTLength; +#endif + struct EHNodeDsc { enum EHBlockType diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 12b02c90a39ba4..078d01c3925989 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1517,6 +1517,7 @@ void EEJitManager::SetCpuInfo() { uint32_t maxVectorTLength = (maxVectorTBitWidth / 8); uint64_t sveLengthFromOS = GetSveLengthFromOS(); + g_sve_length = sveLengthFromOS; // For now, enable SVE only when the system vector length is 16 bytes (128-bits) // TODO: https://github.com/dotnet/runtime/issues/101477 diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 35c149355fae27..d50addc64d51f5 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -88,6 +88,10 @@ class EECodeInfo; #define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1)) #define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1)) +#ifdef TARGET_ARM64 +extern uint64_t g_sve_length; +#endif + enum StubCodeBlockKind : int { STUB_CODE_BLOCK_UNKNOWN = 0, From 41a1d0591cebb986d03ceb75c0f8e911fdd36e6f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 13:46:12 -0700 Subject: [PATCH 002/120] Add InstructionSet_Vector --- src/coreclr/inc/corinfoinstructionset.h | 3 +++ src/coreclr/jit/compiler.cpp | 4 ++++ src/coreclr/jit/hwintrinsic.cpp | 7 +++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 9 +++++++++ src/coreclr/jit/typelist.h | 3 +++ 5 files changed, 26 insertions(+) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5b021c0b689045..79c22d4767895d 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -41,6 +41,7 @@ enum CORINFO_InstructionSet InstructionSet_Sha1_Arm64=23, InstructionSet_Sha256_Arm64=24, InstructionSet_Sve_Arm64=25, + InstructionSet_Vector=26, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -860,6 +861,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Sha256_Arm64"; case InstructionSet_Atomics : return "Atomics"; + case InstructionSet_Vector: + return "Vector`1"; case InstructionSet_Vector64 : return "Vector64"; case InstructionSet_Vector128 : diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b7e08a8415f2ef..087dfec385f3d4 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2047,6 +2047,10 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); } + if (instructionSetFlags.HasInstructionSet(InstructionSet_Sve)) + { + instructionSetFlags.AddInstructionSet(InstructionSet_Vector); + } #endif // TARGET_ARM64 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags))); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a00d57962d757b..d28cecec107d10 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1148,6 +1148,13 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } } + else if (isa == InstructionSet_Vector) + { + if (!comp->IsBaselineSimdIsaSupported()) + { + return NI_Illegal; + } + } #endif size_t isaIndex = static_cast(isa) - 1; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index be1c577d4bdfaa..5313e4201756f2 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -114,6 +114,14 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_Vector128; } + else if (strncmp(className, "Vector`1", 8) == 0) + { + return InstructionSet_Vector; + } + else if (strncmp(className, "Vector", 6) == 0) + { + return InstructionSet_Vector; + } } return InstructionSet_ILLEGAL; @@ -212,6 +220,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_Sha256_Arm64: case InstructionSet_Sve: case InstructionSet_Sve_Arm64: + case InstructionSet_Vector: case InstructionSet_Vector64: case InstructionSet_Vector128: return true; diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 865c177bc7bc32..7de8c35ecf6fff 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -60,6 +60,9 @@ DEF_TP(STRUCT ,"struct" , TYP_STRUCT, 0, 0, 0, 1, 4, VTR_INT, available DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, 8, 8, 8, 2, 8, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, 12,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) +#if defined(TARGET_ARM64) +DEF_TP(SIMD ,"simd" , TYP_SIMD , -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) +#endif // TARGET_ARM64 #if defined(TARGET_XARCH) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) From c7d8ede62a07d824616a6a0b26d64cdca60bde81 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 13:53:13 -0700 Subject: [PATCH 003/120] Add CORINFO_HFA_ELEM_VECTOR_VL --- src/coreclr/inc/corhdr.h | 1 + .../tools/Common/JitInterface/CorInfoImpl.cs | 2 +- .../tools/Common/JitInterface/CorInfoTypes.cs | 1 + src/coreclr/vm/arm64/profiler.cpp | 2 +- src/coreclr/vm/callingconvention.h | 3 ++ src/coreclr/vm/class.cpp | 31 +++++++++++++++++-- src/coreclr/vm/classlayoutinfo.cpp | 1 + 7 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/coreclr/inc/corhdr.h b/src/coreclr/inc/corhdr.h index 84f7ebcf428b75..83beddf3bf7866 100644 --- a/src/coreclr/inc/corhdr.h +++ b/src/coreclr/inc/corhdr.h @@ -1750,6 +1750,7 @@ typedef enum CorInfoHFAElemType : unsigned { CORINFO_HFA_ELEM_DOUBLE, CORINFO_HFA_ELEM_VECTOR64, CORINFO_HFA_ELEM_VECTOR128, + CORINFO_HFA_ELEM_VECTOR_VL, } CorInfoHFAElemType; // diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index a0a98dc9fae023..ebbb614841f987 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -3299,7 +3299,7 @@ private CorInfoHFAElemType getHFAType(CORINFO_CLASS_STRUCT_* hClass) ValueTypeShapeCharacteristics.Float32Aggregate => CorInfoHFAElemType.CORINFO_HFA_ELEM_FLOAT, ValueTypeShapeCharacteristics.Float64Aggregate => CorInfoHFAElemType.CORINFO_HFA_ELEM_DOUBLE, ValueTypeShapeCharacteristics.Vector64Aggregate => CorInfoHFAElemType.CORINFO_HFA_ELEM_VECTOR64, - ValueTypeShapeCharacteristics.Vector128Aggregate => CorInfoHFAElemType.CORINFO_HFA_ELEM_VECTOR128, + ValueTypeShapeCharacteristics.Vector128Aggregate => CorInfoHFAElemType.CORINFO_HFA_ELEM_VECTOR128, //TODO-VL: Need for VL too? _ => CorInfoHFAElemType.CORINFO_HFA_ELEM_NONE }; } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 08d7df78c946d8..107a8289104add 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -720,6 +720,7 @@ public enum CorInfoHFAElemType CORINFO_HFA_ELEM_DOUBLE, CORINFO_HFA_ELEM_VECTOR64, CORINFO_HFA_ELEM_VECTOR128, + CORINFO_HFA_ELEM_VECTOR_VL, } /* data to optimize delegate construction */ diff --git a/src/coreclr/vm/arm64/profiler.cpp b/src/coreclr/vm/arm64/profiler.cpp index 471677347ea578..04165b22c91d38 100644 --- a/src/coreclr/vm/arm64/profiler.cpp +++ b/src/coreclr/vm/arm64/profiler.cpp @@ -282,7 +282,7 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void) { CorInfoHFAElemType hfaElemType = thReturnValueType.GetHFAType(); - if (hfaElemType == CORINFO_HFA_ELEM_VECTOR128) + if ((hfaElemType == CORINFO_HFA_ELEM_VECTOR128) || (hfaElemType == CORINFO_HFA_ELEM_VECTOR_VL)) { return &pData->floatArgumentRegisters.q[0]; } diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 5b167300ec856d..7117ae67c9a047 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -60,6 +60,9 @@ struct ArgLocDesc case CORINFO_HFA_ELEM_DOUBLE: return 8; case CORINFO_HFA_ELEM_VECTOR64: return 8; case CORINFO_HFA_ELEM_VECTOR128: return 16; +#if defined(TARGET_ARM64) + case CORINFO_HFA_ELEM_VECTOR_VL: return g_sve_length; // TODO-VL: Need to return the cached value +#endif default: _ASSERTE(!"Invalid HFA Type"); return 0; } } diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index 6120ee32ea696e..dd8d3ad294af96 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -1780,7 +1780,19 @@ CorInfoHFAElemType MethodTable::GetHFAType() int vectorSize = pMT->GetVectorSize(); if (vectorSize != 0) { - return (vectorSize == 8) ? CORINFO_HFA_ELEM_VECTOR64 : CORINFO_HFA_ELEM_VECTOR128; + if (vectorSize == 8) + { + return CORINFO_HFA_ELEM_VECTOR64; + } + else if (vectorSize == 16) + { + return CORINFO_HFA_ELEM_VECTOR128; + } + else + { + assert ((vectorSize % 16) == 0); + return CORINFO_HFA_ELEM_VECTOR_VL; + } } PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw(); @@ -1885,7 +1897,19 @@ EEClass::CheckForHFA() int thisElemSize = pMT->GetVectorSize(); if (thisElemSize != 0) { - fieldHFAType = (thisElemSize == 8) ? CORINFO_HFA_ELEM_VECTOR64 : CORINFO_HFA_ELEM_VECTOR128; + if (thisElemSize == 8) + { + fieldHFAType = CORINFO_HFA_ELEM_VECTOR64; + } + else if (thisElemSize == 16) + { + fieldHFAType = CORINFO_HFA_ELEM_VECTOR128; + } + else + { + assert ((thisElemSize % 16) == 0); + fieldHFAType = CORINFO_HFA_ELEM_VECTOR_VL; + } } else #endif // TARGET_ARM64 @@ -1956,6 +1980,9 @@ EEClass::CheckForHFA() case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; + case CORINFO_HFA_ELEM_VECTOR_VL: + elemSize = g_sve_length; //TODO-VL: Need to cache it + break; #endif default: // ELEMENT_TYPE_END diff --git a/src/coreclr/vm/classlayoutinfo.cpp b/src/coreclr/vm/classlayoutinfo.cpp index b7290c5a5c3b5f..1fd060cb38729d 100644 --- a/src/coreclr/vm/classlayoutinfo.cpp +++ b/src/coreclr/vm/classlayoutinfo.cpp @@ -1195,6 +1195,7 @@ CorInfoHFAElemType EEClassNativeLayoutInfo::GetNativeHFATypeRaw() const #ifdef TARGET_ARM64 case CORINFO_HFA_ELEM_VECTOR64: elemSize = 8; break; case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; + case CORINFO_HFA_ELEM_VECTOR_VL: elemSize = g_sve_length; break; //TODO-VL: Need to cache this #endif default: _ASSERTE(!"Invalid HFA Type"); } From 926eb6945102ab8e38ad935c8cc776b5c70399a3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 14:27:16 -0700 Subject: [PATCH 004/120] Update the type of TYP_SIMD --- src/coreclr/jit/compiler.cpp | 13 ++++++++++--- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/compiler.hpp | 4 ++-- src/coreclr/jit/emit.cpp | 4 ++-- src/coreclr/jit/emit.h | 4 ++-- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 087dfec385f3d4..a3040463aca8b3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -104,7 +104,7 @@ inline bool _our_GetThreadCycles(uint64_t* cycleOut) #endif // which host OS -const BYTE genTypeSizes[] = { +BYTE genTypeSizes[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz, #include "typelist.h" #undef DEF_TP @@ -116,7 +116,7 @@ const BYTE genTypeAlignments[] = { #undef DEF_TP }; -const BYTE genTypeStSzs[] = { +BYTE genTypeStSzs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) st, #include "typelist.h" #undef DEF_TP @@ -490,6 +490,14 @@ Compiler::Compiler(ArenaAllocator* arena, info.compHasNextCallRetAddr = false; info.compIsVarArgs = false; + +#if defined(TARGET_ARM64) + Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself + genTypeSizes[TYP_SIMD] = (BYTE)Compiler::compVectorTLength; + emitTypeSizes[TYP_SIMD] = (unsigned short)Compiler::compVectorTLength; + emitTypeActSz[TYP_SIMD] = (unsigned short)Compiler::compVectorTLength; + genTypeStSzs[TYP_SIMD] = (BYTE)Compiler::compVectorTLength / sizeof(int); +#endif // TARGET_ARM64 } //------------------------------------------------------------------------ @@ -7759,7 +7767,6 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd, compilerMem = pParam->pAlloc->allocateMemory(sizeof(Compiler)); } - Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself pParam->pComp = new (compilerMem, jitstd::placement_t()) Compiler(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo, pParam->inlineInfo); #if MEASURE_CLRAPI_CALLS diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d343807c1e8a76..ce1a9b21fca449 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -12567,9 +12567,9 @@ const instruction INS_BREAKPOINT = INS_ebreak; /*****************************************************************************/ -extern const BYTE genTypeSizes[]; +extern BYTE genTypeSizes[]; extern const BYTE genTypeAlignments[]; -extern const BYTE genTypeStSzs[]; +extern BYTE genTypeStSzs[]; extern const BYTE genActualTypes[]; /*****************************************************************************/ diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index a1e706fde6083e..13370049e7554a 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1115,7 +1115,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask, var_typ * Return the size in bytes of the given type. */ -extern const BYTE genTypeSizes[TYP_COUNT]; +extern BYTE genTypeSizes[TYP_COUNT]; template inline unsigned genTypeSize(T value) @@ -1131,7 +1131,7 @@ inline unsigned genTypeSize(T value) * returns 1 for 32-bit types and 2 for 64-bit types. */ -extern const BYTE genTypeStSzs[TYP_COUNT]; +extern BYTE genTypeStSzs[TYP_COUNT]; template inline unsigned genTypeStSz(T value) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 93d515c4f92acf..91e9ae74229c94 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -595,13 +595,13 @@ void emitterStats(FILE* fout) /*****************************************************************************/ -const unsigned short emitTypeSizes[] = { +unsigned short emitTypeSizes[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sze, #include "typelist.h" #undef DEF_TP }; -const unsigned short emitTypeActSz[] = { +unsigned short emitTypeActSz[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) asze, #include "typelist.h" #undef DEF_TP diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index ef1fd2f701fc15..1a72001b8d297a 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3714,7 +3714,7 @@ inline unsigned emitter::emitSpecifiedOffset(unsigned insCount, unsigned igSize) return codePos; } -extern const unsigned short emitTypeSizes[TYP_COUNT]; +extern unsigned short emitTypeSizes[TYP_COUNT]; template inline emitAttr emitTypeSize(T type) @@ -3724,7 +3724,7 @@ inline emitAttr emitTypeSize(T type) return (emitAttr)emitTypeSizes[TypeGet(type)]; } -extern const unsigned short emitTypeActSz[TYP_COUNT]; +extern unsigned short emitTypeActSz[TYP_COUNT]; template inline emitAttr emitActualTypeSize(T type) From 2b39810066aff99edb048c28b5d6b37cf6d0c3b4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 14:47:21 -0700 Subject: [PATCH 005/120] Passing Vector to args and returns --- src/coreclr/jit/abi.cpp | 6 +++++ src/coreclr/jit/codegencommon.cpp | 12 +++++++-- src/coreclr/jit/compiler.cpp | 16 ++++++++++-- src/coreclr/jit/compiler.h | 14 ++++++++++- src/coreclr/jit/emitarm64.cpp | 27 ++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 3 +++ src/coreclr/jit/emitarm64sve.cpp | 42 +++++++++++++++++++++++++++++++ 7 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 0525e18888f46b..c3805589a2b9cf 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -125,6 +125,12 @@ var_types ABIPassingSegment::GetRegisterType() const return TYP_SIMD16; #endif default: +#ifdef TARGET_ARM64 + if (Size == Compiler::compVectorTLength) + { + return TYP_SIMD; + } +#endif assert(!"Unexpected size for floating point register"); return TYP_UNDEF; } diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 174c19e327b7ef..51789a805ff1c5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3580,8 +3580,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); + insOpts opts = INS_OPTS_NONE; +#ifdef TARGET_ARM64 + opts = copyType == TYP_SIMD ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; +#endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, - /* canSkip */ false); + /* canSkip */ false, opts); if (node->copiedReg == initReg) { *initRegStillZeroed = false; @@ -3598,8 +3602,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); + insOpts opts = INS_OPTS_NONE; +#ifdef TARGET_ARM64 + opts = edge->type == TYP_SIMD ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; +#endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, - /* canSkip */ true); + /* canSkip */ true, opts); break; } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a3040463aca8b3..a0bd665b905a49 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -495,7 +495,7 @@ Compiler::Compiler(ArenaAllocator* arena, Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself genTypeSizes[TYP_SIMD] = (BYTE)Compiler::compVectorTLength; emitTypeSizes[TYP_SIMD] = (unsigned short)Compiler::compVectorTLength; - emitTypeActSz[TYP_SIMD] = (unsigned short)Compiler::compVectorTLength; + emitTypeActSz[TYP_SIMD] = EA_SCALABLE; genTypeStSzs[TYP_SIMD] = (BYTE)Compiler::compVectorTLength / sizeof(int); #endif // TARGET_ARM64 } @@ -683,6 +683,13 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS { return useType; } +#ifdef TARGET_ARM64 + if (structSize == compVectorTLength) + { + var_types hfaType = GetHfaType(clsHnd); + return hfaType == TYP_SIMD ? TYP_SIMD : TYP_UNKNOWN; + } +#endif } // Now deal with non-HFA/HVA structs. @@ -908,7 +915,12 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. - if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES)) + if (canReturnInRegister && (useType == TYP_UNKNOWN) && + ((structSize <= MAX_PASS_SINGLEREG_BYTES) +#ifdef TARGET_ARM64 + || ((GetHfaType(clsHnd) == TYP_SIMD) && (structSize == compVectorTLength))) +#endif + ) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ce1a9b21fca449..9c1773c53b691b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -153,6 +153,10 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind) case CORINFO_HFA_ELEM_DOUBLE: return TYP_DOUBLE; #ifdef FEATURE_SIMD +#ifdef TARGET_ARM64 + case CORINFO_HFA_ELEM_VECTOR_VL: + return TYP_SIMD; +#endif case CORINFO_HFA_ELEM_VECTOR64: return TYP_SIMD8; case CORINFO_HFA_ELEM_VECTOR128: @@ -174,6 +178,10 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type) case TYP_DOUBLE: return CORINFO_HFA_ELEM_DOUBLE; #ifdef FEATURE_SIMD +#ifdef TARGET_ARM64 + case TYP_SIMD: + return CORINFO_HFA_ELEM_VECTOR_VL; +#endif case TYP_SIMD8: return CORINFO_HFA_ELEM_VECTOR64; case TYP_SIMD16: @@ -9176,7 +9184,11 @@ class Compiler return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compExactlyDependsOn(InstructionSet_VectorT128)) + if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) + { + return Compiler::compVectorTLength; + } + else if (compExactlyDependsOn(InstructionSet_VectorT128)) { return FP_REGSIZE_BYTES; } diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ea9a9b53b2797e..901c0a61d778da 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2363,6 +2363,33 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) return code; } +/*static*/ bool emitter::emitIns_valid_imm_for_sve_mov(INT64 imm, emitAttr elemsize) +{ + switch (elemsize) + { + case EA_1BYTE: + { + return (-128 <= imm) && (imm <= 127); + } + case EA_2BYTE: + case EA_4BYTE: + case EA_8BYTE: + + { + if ((-32768 <= imm) && (imm <= 32512) && (imm != 0)) + { + return imm % 256 == 0; + } + break; + } + default: + { + unreached(); + } + } + return false; +} + // true if this 'imm' can be encoded as a input operand to a mov instruction /*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size) { diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 8e2ed80c6cdf28..afc2852e00e133 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1029,6 +1029,9 @@ static unsigned insGetRegisterListSize(instruction ins); /************************************************************************/ public: +// true if this 'imm' can be encoded as a input operand to a SVE mov instruction +static bool emitIns_valid_imm_for_sve_mov(INT64 imm, emitAttr size); + // true if this 'imm' can be encoded as a input operand to a mov instruction static bool emitIns_valid_imm_for_mov(INT64 imm, emitAttr size); diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 62e767d6320525..88c0d2d1848cad 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2662,6 +2662,27 @@ void emitter::emitInsSve_R_R_I(instruction ins, // iiiiii assert(insScalableOptsNone(sopt)); + + // Since SVE uses "mul vl", we need to make sure that we calculate + // the offset correctly. + if (Compiler::compVectorTLength > 16) // TODO-VL: Convert it into helper method + { + if ((imm % Compiler::compVectorTLength) == 0) + { + // If imm is a multiple of Compiler::compVectorTLength, + // we can use the `[#imm mul vl]` + imm = imm / Compiler::compVectorTLength; + } + else + { + // Otherwise, create the address first and then + // use it in str + // add reg2, reg2, imm + // str zn, [reg2] + emitIns_R_R_I(INS_add, EA_8BYTE, reg2, reg2, imm); + imm = 0; + } + } if (isVectorRegister(reg1)) { fmt = IF_SVE_IE_2A; @@ -2681,6 +2702,27 @@ void emitter::emitInsSve_R_R_I(instruction ins, // iiiiii assert(insScalableOptsNone(sopt)); + + // Since SVE uses "mul vl", we need to make sure that we calculate + // the offset correctly. + if (Compiler::compVectorTLength > 16) // TODO-VL: Convert it into helper method + { + if ((imm % Compiler::compVectorTLength) == 0) + { + // If imm is a multiple of Compiler::compVectorTLength, + // we can use the `[#imm mul vl]` + imm = imm / Compiler::compVectorTLength; + } + else + { + // Otherwise, create the address first and then + // use it in str + // add reg2, reg2, imm + // str zn, [reg2] + emitIns_R_R_I(INS_add, EA_8BYTE, reg2, reg2, imm); + imm = 0; + } + } if (isVectorRegister(reg1)) { fmt = IF_SVE_JH_2A; From cf9ea6090ce27f194eedc562941e6bcf5317e4a9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 14:49:50 -0700 Subject: [PATCH 006/120] Rename TYP_SIMD -> TYP_SIMDVL --- src/coreclr/jit/abi.cpp | 2 +- src/coreclr/jit/codegencommon.cpp | 4 ++-- src/coreclr/jit/compiler.cpp | 12 ++++++------ src/coreclr/jit/compiler.h | 6 +++--- src/coreclr/jit/typelist.h | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index c3805589a2b9cf..15abbd37bfb957 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -128,7 +128,7 @@ var_types ABIPassingSegment::GetRegisterType() const #ifdef TARGET_ARM64 if (Size == Compiler::compVectorTLength) { - return TYP_SIMD; + return TYP_SIMDVL; } #endif assert(!"Unexpected size for floating point register"); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 51789a805ff1c5..254e1ee6b88c64 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3582,7 +3582,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(node->reg, copyType); insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = copyType == TYP_SIMD ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + opts = copyType == TYP_SIMDVL ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; #endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false, opts); @@ -3604,7 +3604,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = edge->type == TYP_SIMD ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + opts = edge->type == TYP_SIMDVL ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; #endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true, opts); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a0bd665b905a49..bbdab27aa34de4 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -493,10 +493,10 @@ Compiler::Compiler(ArenaAllocator* arena, #if defined(TARGET_ARM64) Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself - genTypeSizes[TYP_SIMD] = (BYTE)Compiler::compVectorTLength; - emitTypeSizes[TYP_SIMD] = (unsigned short)Compiler::compVectorTLength; - emitTypeActSz[TYP_SIMD] = EA_SCALABLE; - genTypeStSzs[TYP_SIMD] = (BYTE)Compiler::compVectorTLength / sizeof(int); + genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; + emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; + emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; + genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); #endif // TARGET_ARM64 } @@ -687,7 +687,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS if (structSize == compVectorTLength) { var_types hfaType = GetHfaType(clsHnd); - return hfaType == TYP_SIMD ? TYP_SIMD : TYP_UNKNOWN; + return hfaType == TYP_SIMDVL ? TYP_SIMDVL : TYP_UNKNOWN; } #endif } @@ -918,7 +918,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, if (canReturnInRegister && (useType == TYP_UNKNOWN) && ((structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 - || ((GetHfaType(clsHnd) == TYP_SIMD) && (structSize == compVectorTLength))) + || ((GetHfaType(clsHnd) == TYP_SIMDVL) && (structSize == compVectorTLength))) #endif ) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9c1773c53b691b..dea2627bad3f72 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -155,7 +155,7 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind) #ifdef FEATURE_SIMD #ifdef TARGET_ARM64 case CORINFO_HFA_ELEM_VECTOR_VL: - return TYP_SIMD; + return TYP_SIMDVL; #endif case CORINFO_HFA_ELEM_VECTOR64: return TYP_SIMD8; @@ -179,7 +179,7 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type) return CORINFO_HFA_ELEM_DOUBLE; #ifdef FEATURE_SIMD #ifdef TARGET_ARM64 - case TYP_SIMD: + case TYP_SIMDVL: return CORINFO_HFA_ELEM_VECTOR_VL; #endif case TYP_SIMD8: @@ -9596,7 +9596,7 @@ class Compiler } private: - // Returns true if the TYP_SIMD locals on stack are aligned at their + // Returns true if the TYP_SIMDVL locals on stack are aligned at their // preferred byte boundary specified by getSIMDTypeAlignment(). // // As per the Intel manual, the preferred alignment for AVX vectors is diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 7de8c35ecf6fff..68d27be35529ee 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -61,7 +61,7 @@ DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, 8, 8, 8, 2, 8, VTR_FLOAT, available DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, 12,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #if defined(TARGET_ARM64) -DEF_TP(SIMD ,"simd" , TYP_SIMD , -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) +DEF_TP(SIMDVL ,"simdVL" , TYP_SIMDVL, -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #endif // TARGET_ARM64 #if defined(TARGET_XARCH) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) From 21f364b94f600f01ec5f76b3470cfc30e94e1ca1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 16:00:43 -0700 Subject: [PATCH 007/120] Fix code to save/restore upper registers of VL --- src/coreclr/jit/codegenarm64.cpp | 41 +++++++++++++++++++++++++++----- src/coreclr/jit/lsra.cpp | 25 ++++++++++++++++--- src/coreclr/jit/lsrabuild.cpp | 10 +++++--- 3 files changed, 64 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 87e64d6e0afc35..af867d1d6641d8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5414,14 +5414,28 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); - - regNumber tgtReg = node->GetRegNum(); - assert(tgtReg != REG_NA); + unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); + assert((varSize == 16) || (varSize == Compiler::compVectorTLength)); regNumber op1Reg = genConsumeReg(op1); assert(op1Reg != REG_NA); + regNumber tgtReg = node->GetRegNum(); +#ifdef TARGET_ARM64 + // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. + if (op1->TypeIs(TYP_SIMDVL)) + { + // Until we custom ABI for SVE, we will just store entire contents of Z* registers + // on stack. If we don't do it, we will need multiple free registers to save the + // contents of everything but lower 8-bytes. + assert(tgtReg == REG_NA); + + GetEmitter()->emitIns_S_R(INS_sve_str, EA_SCALABLE, op1Reg, lclNode->GetLclNum(), 0); + return; + } +#endif // TARGET_ARM64 + assert(tgtReg != REG_NA); + GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, tgtReg, op1Reg, 0, 1); if ((node->gtFlags & GTF_SPILL) != 0) @@ -5470,10 +5484,12 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); + + unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); + assert((varSize == 16) || (varSize == Compiler::compVectorTLength)); regNumber srcReg = node->GetRegNum(); - assert(srcReg != REG_NA); + assert((srcReg != REG_NA) || (node->TypeIs(TYP_SIMDVL))); regNumber lclVarReg = genConsumeReg(lclNode); assert(lclVarReg != REG_NA); @@ -5485,6 +5501,19 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) // The localVar must have a stack home. assert(varDsc->lvOnFrame); +#ifdef TARGET_ARM64 + // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. + if (TypeGet(op1) == TYP_SIMDVL) + { + // Until we custom ABI for SVE, we will just store entire contents of Z* registers + // on stack. If we don't do it, we will need multiple free registers to save the + // contents of everything but lower 8-bytes. + + GetEmitter()->emitIns_R_S(INS_sve_ldr, EA_SCALABLE, lclVarReg, varNum, 0); + return; + } +#endif // TARGET_ARM64 + // We will load this from the upper 8 bytes of this localVar's home. int offset = 8; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1ffb45cd0f7fdf..5b4d0aef5865b3 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1603,6 +1603,9 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: +#if defined(TARGET_ARM64) + case TYP_SIMDVL: +#endif // TARGET_ARM64 #if defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: @@ -5917,6 +5920,14 @@ void LinearScan::allocateRegisters() allocate = false; lclVarInterval->isPartiallySpilled = true; } +#elif defined(TARGET_ARM64) + else if (lclVarInterval->registerType == TYP_SIMDVL) + { + // TODO-VL: Need to do this for allocateRegistersMinimal too? + allocate = false; + lclVarInterval->isPartiallySpilled = true; + setIntervalAsSpilled(currentInterval); // Just mark it spill at this point. + } #endif // TARGET_XARCH else { @@ -5929,6 +5940,13 @@ void LinearScan::allocateRegisters() if (lclVarInterval->isPartiallySpilled) { lclVarInterval->isPartiallySpilled = false; +#if defined(TARGET_ARM64) + if (lclVarInterval->registerType == TYP_SIMDVL) + { + // TODO-VL: Need to do this for allocateRegistersMinimal too? + allocate = false; + } +#endif // TARGET_ARM64 } else { @@ -7405,8 +7423,9 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, // while on x86 we can spill directly to memory. regNumber spillReg = refPosition->assignedReg(); #ifdef TARGET_ARM64 - bool spillToMem = refPosition->spillAfter; - assert(spillReg != REG_NA); + bool isVariableVL = tree->TypeIs(TYP_SIMDVL); + bool spillToMem = refPosition->spillAfter || isVariableVL; + assert((spillReg != REG_NA) || isVariableVL); #else bool spillToMem = (spillReg == REG_NA); assert(!refPosition->spillAfter); @@ -7507,7 +7526,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, simdUpperRestore->gtFlags |= GTF_NOREG_AT_USE; #else simdUpperRestore->gtFlags |= GTF_SPILLED; - assert(refPosition->assignedReg() != REG_NA); + assert((refPosition->assignedReg() != REG_NA) || (restoreLcl->TypeIs(TYP_SIMDVL))); restoreReg = refPosition->assignedReg(); #endif } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5d4a166dd61fbb..ef10d00939c35d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1478,6 +1478,12 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, } } + bool forceRegOptional = false; +#ifdef TARGET_XARCH + forceRegOptional = true; +#elif TARGET_ARM64 + forceRegOptional = tree->TypeIs(TYP_SIMDVL); +#endif if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars)) { // We assume that the kill set includes at least some callee-trash registers, but @@ -1519,9 +1525,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, varInterval->isPartiallySpilled = true; pos->skipSaveRestore = blockAlwaysReturn; pos->liveVarUpperSave = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex); -#ifdef TARGET_XARCH - pos->regOptional = true; -#endif + pos->regOptional = forceRegOptional; } } } From 7a513ed8bb5ed7e106f0586d1779cd464f0ddaac Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 19 Mar 2025 17:00:55 -0700 Subject: [PATCH 008/120] misc changes --- src/coreclr/jit/compiler.h | 18 ++++++++++++++++-- src/coreclr/jit/hwintrinsic.cpp | 2 +- src/coreclr/jit/instr.cpp | 26 ++++++++++++++++++++++++++ src/coreclr/jit/lclvars.cpp | 3 +++ src/coreclr/jit/scopeinfo.cpp | 10 +++++++--- src/coreclr/jit/targetarm64.h | 4 ++-- 6 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index dea2627bad3f72..9bb5b791404766 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8223,7 +8223,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12)); + return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMDVL)); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") @@ -9237,7 +9237,11 @@ class Compiler return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) + if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) + { + return Compiler::compVectorTLength; + } + else if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) { return FP_REGSIZE_BYTES; } @@ -9345,6 +9349,10 @@ class Compiler // Return 0 if size is even less than XMM, otherwise - XMM return (size >= XMM_REGSIZE_BYTES) ? XMM_REGSIZE_BYTES : 0; #elif defined(TARGET_ARM64) + if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) + { + return (size >= Compiler::compVectorTLength) ? Compiler::compVectorTLength : 0; + } assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); return (size >= FP_REGSIZE_BYTES) ? FP_REGSIZE_BYTES : 0; #else @@ -9374,6 +9382,12 @@ class Compiler { simdType = TYP_SIMD16; } +#if defined(TARGET_ARM64) + else if (size == compVectorTLength) + { + simdType = TYP_SIMDVL; + } +#endif // TARGET_ARM64 #if defined(TARGET_XARCH) else if (size == 32) { diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index d28cecec107d10..a208f1e5777f6a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2005,7 +2005,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - if ((simdSize != 8) && (simdSize != 16)) + if ((simdSize != 8) && (simdSize != 16) && (simdSize != compVectorTLength)) #elif defined(TARGET_XARCH) if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64)) #endif // TARGET_* diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index fe3da0a63eb904..b855e4925d957f 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -534,6 +534,8 @@ void CodeGen::inst_Mov(var_types dstType, #ifdef TARGET_ARM GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); +#elif defined(TARGET_ARM64) + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, size == EA_SCALABLE ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE); #else GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); #endif @@ -1911,6 +1913,12 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* return INS_sve_ldr; #endif } +#ifdef TARGET_ARM64 + else if (srcType == TYP_SIMDVL) + { + return INS_sve_ldr; + } +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(srcType)); @@ -1999,6 +2007,12 @@ instruction CodeGen::ins_Copy(var_types dstType) return INS_sve_mov; #endif } +#ifdef TARGET_ARM64 + else if (dstType == TYP_SIMDVL) + { + return INS_sve_mov; + } +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); @@ -2120,6 +2134,12 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return INS_sve_mov; #endif } +#ifdef TARGET_ARM64 + else if (dstType == TYP_SIMDVL) + { + return INS_sve_mov; + } +#endif #endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); @@ -2231,6 +2251,12 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false return INS_sve_str; #endif } +#ifdef TARGET_ARM64 + else if (dstType == TYP_SIMDVL) + { + return INS_sve_str; + } +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS assert(varTypeUsesFloatReg(dstType)); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 1e6d365b9d133f..a9c63bc103fb67 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3108,6 +3108,9 @@ void Compiler::lvaSortByRefCount() case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: +#ifdef TARGET_ARM64 + case TYP_SIMDVL: +#endif #ifdef TARGET_XARCH case TYP_SIMD32: case TYP_SIMD64: diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 83482f13919dd2..0ffea14633a6ae 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -292,10 +292,12 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_XARCH) +#if defined(TARGET_ARM64) + case TYP_SIMDVL: +#elif defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_ARM64 #endif // FEATURE_SIMD #ifdef TARGET_64BIT case TYP_LONG: @@ -432,7 +434,9 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_XARCH) +#if defined(TARGET_ARM64) + case TYP_SIMDVL: +#elif defined(TARGET_XARCH) case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 3e1dec49b4778a..efaa0e8fac6c44 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -26,8 +26,8 @@ #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register - #define MAX_PASS_SINGLEREG_BYTES 16 // Maximum size of a struct passed in a single register (16-byte vector). - #define MAX_PASS_MULTIREG_BYTES 64 // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA) + #define MAX_PASS_SINGLEREG_BYTES 16 // Maximum size of a struct passed in a single register (16-byte vector). //TODO-VL: This can be VL now? + #define MAX_PASS_MULTIREG_BYTES 64 // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA) //TODO-VL: This can be VL now? #define MAX_RET_MULTIREG_BYTES 64 // Maximum size of a struct that could be returned in more than one register (Max is an HVA of 4 16-byte vectors) #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 128-bit vectors using an HVA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. From b1c983370ff18029d00ca46885b74cf441af0676 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 20 Mar 2025 15:41:10 -0700 Subject: [PATCH 009/120] Bring TYP_SIMD32 and TYP_SIMD64 for Arm64 --- src/coreclr/jit/assertionprop.cpp | 5 +- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/gentree.cpp | 30 ++++----- src/coreclr/jit/gentree.h | 38 +++++------ src/coreclr/jit/importer.cpp | 7 ++ src/coreclr/jit/instr.cpp | 4 +- src/coreclr/jit/lclvars.cpp | 4 +- src/coreclr/jit/lower.cpp | 13 ++-- src/coreclr/jit/lsra.cpp | 4 +- src/coreclr/jit/optcse.cpp | 4 +- src/coreclr/jit/promotiondecomposition.cpp | 4 +- src/coreclr/jit/scopeinfo.cpp | 8 +-- src/coreclr/jit/simd.h | 8 +-- src/coreclr/jit/typelist.h | 4 +- src/coreclr/jit/valuenum.cpp | 76 +++++++++++----------- src/coreclr/jit/valuenum.h | 24 +++---- 16 files changed, 118 insertions(+), 121 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 6c4a631652e838..38131a8c5f2b76 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -3007,8 +3007,7 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G conValTree = vecCon; break; } - -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t value = vnStore->ConstantValue(vnCns); @@ -3032,7 +3031,7 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G } break; -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #endif // FEATURE_SIMD #if defined(FEATURE_MASKED_HW_INTRINSICS) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9bb5b791404766..91f3b33bcf95f3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8223,7 +8223,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMDVL)); + return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMDVL) || (type == TYP_SIMD32) || (type == TYP_SIMD64)); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") @@ -9388,7 +9388,7 @@ class Compiler simdType = TYP_SIMDVL; } #endif // TARGET_ARM64 -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) else if (size == 32) { simdType = TYP_SIMD32; @@ -9397,7 +9397,7 @@ class Compiler { simdType = TYP_SIMD64; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 else { noway_assert(!"Unexpected size for SIMD type"); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ff2c9583ea8af1..9c26f608965480 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -284,7 +284,7 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL); #if defined(FEATURE_SIMD) -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_LARGE); // *** large node #else static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_SMALL); @@ -3185,7 +3185,7 @@ unsigned Compiler::gtHashValue(GenTree* tree) switch (vecCon->TypeGet()) { -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD64: { add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[15]); @@ -3207,7 +3207,7 @@ unsigned Compiler::gtHashValue(GenTree* tree) add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[4]); FALLTHROUGH; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 case TYP_SIMD16: { @@ -8117,10 +8117,10 @@ GenTree* Compiler::gtNewGenericCon(var_types type, uint8_t* cnsVal) case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 { return gtNewVconNode(type, cnsVal); } @@ -8182,10 +8182,10 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 { GenTreeVecCon* node = gtNewVconNode(type); memset(&node->gtSimdVal, pattern, sizeof(node->gtSimdVal)); @@ -12216,7 +12216,7 @@ void Compiler::gtDispConst(GenTree* tree) break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", vecCon->gtSimdVal.u64[0], @@ -12233,7 +12233,7 @@ void Compiler::gtDispConst(GenTree* tree) break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -18521,7 +18521,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -18537,7 +18537,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types gtSimd64Val = result; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -18583,7 +18583,7 @@ void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_type break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -18599,7 +18599,7 @@ void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_type gtSimd64Val = result; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -31492,7 +31492,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val); @@ -31504,7 +31504,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6af651b6860882..a040bc558350bb 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6634,12 +6634,10 @@ struct GenTreeVecCon : public GenTree simd8_t gtSimd8Val; simd12_t gtSimd12Val; simd16_t gtSimd16Val; - -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) simd32_t gtSimd32Val; simd64_t gtSimd64Val; -#endif // TARGET_XARCH - +#endif // TARGET_XARCH || TARGET_ARM64 simd_t gtSimdVal; }; @@ -6891,7 +6889,7 @@ struct GenTreeVecCon : public GenTree break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -6907,7 +6905,7 @@ struct GenTreeVecCon : public GenTree gtSimd64Val = result; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -6947,7 +6945,7 @@ struct GenTreeVecCon : public GenTree break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -6963,7 +6961,7 @@ struct GenTreeVecCon : public GenTree gtSimd64Val = result; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7000,7 +6998,7 @@ struct GenTreeVecCon : public GenTree break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -7016,7 +7014,7 @@ struct GenTreeVecCon : public GenTree gtSimd64Val = result; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7044,7 +7042,7 @@ struct GenTreeVecCon : public GenTree return gtSimd16Val.IsAllBitsSet(); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return gtSimd32Val.IsAllBitsSet(); @@ -7055,7 +7053,7 @@ struct GenTreeVecCon : public GenTree return gtSimd64Val.IsAllBitsSet(); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7092,7 +7090,7 @@ struct GenTreeVecCon : public GenTree return left->gtSimd16Val == right->gtSimd16Val; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return left->gtSimd32Val == right->gtSimd32Val; @@ -7103,7 +7101,7 @@ struct GenTreeVecCon : public GenTree return left->gtSimd64Val == right->gtSimd64Val; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7135,7 +7133,7 @@ struct GenTreeVecCon : public GenTree return gtSimd16Val.IsZero(); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return gtSimd32Val.IsZero(); @@ -7146,7 +7144,7 @@ struct GenTreeVecCon : public GenTree return gtSimd64Val.IsZero(); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7174,7 +7172,7 @@ struct GenTreeVecCon : public GenTree return EvaluateGetElementFloating(simdBaseType, gtSimd16Val, index); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return EvaluateGetElementFloating(simdBaseType, gtSimd32Val, index); @@ -7184,7 +7182,7 @@ struct GenTreeVecCon : public GenTree { return EvaluateGetElementFloating(simdBaseType, gtSimd64Val, index); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7212,7 +7210,7 @@ struct GenTreeVecCon : public GenTree return EvaluateGetElementIntegral(simdBaseType, gtSimd16Val, index); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return EvaluateGetElementIntegral(simdBaseType, gtSimd32Val, index); @@ -7222,7 +7220,7 @@ struct GenTreeVecCon : public GenTree { return EvaluateGetElementIntegral(simdBaseType, gtSimd64Val, index); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 1e503daabb0f33..bd695edbf5d453 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3876,6 +3876,13 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI } else #endif // TARGET_XARCH +#ifdef TARGET_ARM64 + if ((simdType == TYP_SIMD32) || (simdType == TYP_SIMD64)) + { + hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_Sve); + } + else +#endif // TARGET_ARM64 { // SIMD8, SIMD12, SIMD16 are covered by IsBaselineSimdIsaSupported check assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index b855e4925d957f..7ef72afd937b5d 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1010,7 +1010,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) return OperandDesc(emit->emitSimd16Const(constValue)); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t constValue; @@ -1025,7 +1025,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) return OperandDesc(emit->emitSimd64Const(constValue)); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index a9c63bc103fb67..4186b73085f1e5 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3111,10 +3111,10 @@ void Compiler::lvaSortByRefCount() #ifdef TARGET_ARM64 case TYP_SIMDVL: #endif -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #ifdef FEATURE_MASKED_HW_INTRINSICS case TYP_MASK: #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 965493f39f346a..27428ef3dab357 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2405,7 +2405,7 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) loadWidth = 16; loadType = TYP_SIMD16; } -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) else if ((loadWidth == 32) || (MaxUnrollSize == 64)) { loadWidth = 32; @@ -2416,7 +2416,7 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) loadWidth = 64; loadType = TYP_SIMD64; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #endif // FEATURE_SIMD else { @@ -9753,7 +9753,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) } return; -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) case TYP_SIMD16: if (comp->getPreferredVectorByteLength() >= 32) { @@ -9771,12 +9771,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) } tryReusingPrevValue = true; break; -#elif defined(TARGET_ARM64) // TARGET_AMD64 - case TYP_SIMD16: - tryReusingPrevValue = true; - break; - -#endif // TARGET_ARM64 +#endif // TARGET_AMD64 || TARGET_ARM64 #endif // FEATURE_HW_INTRINSICS #endif // TARGET_64BIT diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5b4d0aef5865b3..6a382c20dc5436 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1606,10 +1606,10 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) #if defined(TARGET_ARM64) case TYP_SIMDVL: #endif // TARGET_ARM64 -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #ifdef FEATURE_MASKED_HW_INTRINSICS case TYP_MASK: #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 5e298345fc4505..9324e6810f9cc6 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -4533,7 +4533,7 @@ bool CSE_Heuristic::PromotionCheck(CSE_Candidate* candidate) // int spillSimdRegInProlog = 1; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // If we have a SIMD32/64 that is live across a call we have even higher spill costs // if (candidate->Expr()->TypeIs(TYP_SIMD32, TYP_SIMD64)) @@ -4548,7 +4548,7 @@ bool CSE_Heuristic::PromotionCheck(CSE_Candidate* candidate) // cse_use_cost += 2; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 extra_yes_cost = (BB_UNITY_WEIGHT_UNSIGNED * spillSimdRegInProlog) * 3; } diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 169de3bcceb970..a829b1bc114e7a 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -346,7 +346,7 @@ class DecompositionPlan primitiveType = TYP_SIMD16; } break; -#ifdef TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case 32: if (m_compiler->getPreferredVectorByteLength() >= 32) { @@ -360,7 +360,7 @@ class DecompositionPlan primitiveType = TYP_SIMD64; } break; -#endif +#endif // TARGET_XARCH || TARGET_ARM64 #endif } } diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 0ffea14633a6ae..40c1133ee816c0 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -294,10 +294,10 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_SIMD16: #if defined(TARGET_ARM64) case TYP_SIMDVL: -#elif defined(TARGET_XARCH) +#elif defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_ARM64 +#endif // TARGET_XARCH || TARGET_ARM64 #endif // FEATURE_SIMD #ifdef TARGET_64BIT case TYP_LONG: @@ -436,10 +436,10 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( case TYP_SIMD16: #if defined(TARGET_ARM64) case TYP_SIMDVL: -#elif defined(TARGET_XARCH) +#elif defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 2f7610b7e6147c..0472047f971c0f 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -192,7 +192,7 @@ struct simd16_t }; static_assert_no_msg(sizeof(simd16_t) == 16); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) struct simd32_t { union @@ -303,7 +303,7 @@ struct simd64_t } }; static_assert_no_msg(sizeof(simd64_t) == 64); -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) struct simdmask_t @@ -357,10 +357,8 @@ struct simdmask_t static_assert_no_msg(sizeof(simdmask_t) == 8); #endif // FEATURE_MASKED_HW_INTRINSICS -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) typedef simd64_t simd_t; -#else -typedef simd16_t simd_t; #endif inline bool IsUnaryBitwiseOperation(genTreeOps oper) diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 68d27be35529ee..9f25a552202147 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -63,10 +63,10 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available #if defined(TARGET_ARM64) DEF_TP(SIMDVL ,"simdVL" , TYP_SIMDVL, -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #endif // TARGET_ARM64 -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 9e223650129306..5af515a1d81ec4 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1706,7 +1706,7 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { m_defs = new (alloc) Alloc::Type[ChunkSize]; @@ -1718,7 +1718,7 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type m_defs = new (alloc) Alloc::Type[ChunkSize]; break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: @@ -1883,7 +1883,7 @@ ValueNum ValueNumStore::VNForSimd16Con(const simd16_t& cnsVal) return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) ValueNum ValueNumStore::VNForSimd32Con(const simd32_t& cnsVal) { return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32); @@ -1893,7 +1893,7 @@ ValueNum ValueNumStore::VNForSimd64Con(const simd64_t& cnsVal) { return VnForConst(cnsVal, GetSimd64CnsMap(), TYP_SIMD64); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) @@ -1990,7 +1990,7 @@ ValueNum ValueNumStore::VNForGenericCon(var_types typ, uint8_t* cnsVal) READ_VALUE(simd16_t); return VNForSimd16Con(val); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { READ_VALUE(simd32_t); @@ -2001,7 +2001,7 @@ ValueNum ValueNumStore::VNForGenericCon(var_types typ, uint8_t* cnsVal) READ_VALUE(simd64_t); return VNForSimd64Con(val); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: @@ -2112,7 +2112,7 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) return VNForSimd16Con(simd16_t::Zero()); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return VNForSimd32Con(simd32_t::Zero()); @@ -2122,7 +2122,7 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) { return VNForSimd64Con(simd64_t::Zero()); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: @@ -2209,7 +2209,7 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ) return VNForSimd16Con(simd16_t::AllBitsSet()); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return VNForSimd32Con(simd32_t::AllBitsSet()); @@ -2219,7 +2219,7 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ) { return VNForSimd64Con(simd64_t::AllBitsSet()); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: @@ -2326,7 +2326,7 @@ ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types sim return VNForSimd16Con(result); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); @@ -2339,7 +2339,7 @@ ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types sim return VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -2392,7 +2392,7 @@ bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, Va break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t tmp = GetConstantSimd32(valVN); @@ -2406,7 +2406,7 @@ bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, Va memcpy(&vector, &tmp, genTypeSize(simdType)); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -2458,7 +2458,7 @@ bool ValueNumStore::VNIsVectorNegativeZero(var_types simdType, var_types simdBas break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t tmp = GetConstantSimd32(valVN); @@ -2472,7 +2472,7 @@ bool ValueNumStore::VNIsVectorNegativeZero(var_types simdType, var_types simdBas memcpy(&vector, &tmp, genTypeSize(simdType)); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -3983,7 +3983,7 @@ simd16_t ValueNumStore::GetConstantSimd16(ValueNum argVN) return ConstantValue(argVN); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // Given a simd32 constant value number return its value as a simd32. // simd32_t ValueNumStore::GetConstantSimd32(ValueNum argVN) @@ -4003,7 +4003,7 @@ simd64_t ValueNumStore::GetConstantSimd64(ValueNum argVN) return ConstantValue(argVN); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) // Given a simdmask constant value number return its value as a simdmask. @@ -7407,7 +7407,7 @@ simd16_t GetConstantSimd16(ValueNumStore* vns, var_types baseType, ValueNum argV return BroadcastConstantToSimd(vns, baseType, argVN); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) simd32_t GetConstantSimd32(ValueNumStore* vns, var_types baseType, ValueNum argVN) { assert(vns->IsVNConstant(argVN)); @@ -7431,7 +7431,7 @@ simd64_t GetConstantSimd64(ValueNumStore* vns, var_types baseType, ValueNum argV return BroadcastConstantToSimd(vns, baseType, argVN); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 ValueNum EvaluateUnarySimd( ValueNumStore* vns, genTreeOps oper, bool scalar, var_types simdType, var_types baseType, ValueNum arg0VN) @@ -7465,7 +7465,7 @@ ValueNum EvaluateUnarySimd( return vns->VNForSimd16Con(result); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN); @@ -7483,7 +7483,7 @@ ValueNum EvaluateUnarySimd( EvaluateUnarySimd(oper, scalar, baseType, &result, arg0); return vns->VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7532,7 +7532,7 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, return vns->VNForSimd16Con(result); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN); @@ -7552,7 +7552,7 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, EvaluateBinarySimd(oper, scalar, baseType, &result, arg0, arg1); return vns->VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7657,7 +7657,7 @@ ValueNum EvaluateSimdGetElement( return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd16(arg0VN), arg1); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd32(arg0VN), arg1); @@ -7667,7 +7667,7 @@ ValueNum EvaluateSimdGetElement( { return EvaluateSimdGetElement(vns, baseType, vns->GetConstantSimd64(arg0VN), arg1); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7703,7 +7703,7 @@ ValueNum EvaluateSimdCvtMaskToVector(ValueNumStore* vns, var_types simdType, var return vns->VNForSimd16Con(result); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -7717,7 +7717,7 @@ ValueNum EvaluateSimdCvtMaskToVector(ValueNumStore* vns, var_types simdType, var EvaluateSimdCvtMaskToVector(baseType, &result, arg0); return vns->VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -7753,7 +7753,7 @@ ValueNum EvaluateSimdCvtVectorToMask(ValueNumStore* vns, var_types simdType, var break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN); @@ -7767,7 +7767,7 @@ ValueNum EvaluateSimdCvtVectorToMask(ValueNumStore* vns, var_types simdType, var EvaluateSimdCvtVectorToMask(baseType, &result, arg0); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -8936,7 +8936,7 @@ ValueNum EvaluateSimdWithElementFloating( return vns->VNForSimd16Con(result); } -#if defined TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -8950,7 +8950,7 @@ ValueNum EvaluateSimdWithElementFloating( EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); return vns->VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -8990,7 +8990,7 @@ ValueNum EvaluateSimdWithElementIntegral( return vns->VNForSimd16Con(result); } -#if defined TARGET_XARCH +#if defined TARGET_XARCH || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t result = {}; @@ -9004,7 +9004,7 @@ ValueNum EvaluateSimdWithElementIntegral( EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); return vns->VNForSimd64Con(result); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 default: { @@ -9977,7 +9977,7 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t cnsVal = GetConstantSimd32(vn); @@ -9995,7 +9995,7 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) cnsVal.u64[6], cnsVal.u64[7]); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: @@ -11571,7 +11571,7 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) break; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: { simd32_t simd32Val; @@ -11589,7 +11589,7 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) tree->gtVNPair.SetBoth(vnStore->VNForSimd64Con(simd64Val)); break; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index a643789a2843ee..0b5ca42e36518b 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -382,10 +382,10 @@ class ValueNumStore simd8_t GetConstantSimd8(ValueNum argVN); simd12_t GetConstantSimd12(ValueNum argVN); simd16_t GetConstantSimd16(ValueNum argVN); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) simd32_t GetConstantSimd32(ValueNum argVN); simd64_t GetConstantSimd64(ValueNum argVN); -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) simdmask_t GetConstantSimdMask(ValueNum argVN); #endif // FEATURE_MASKED_HW_INTRINSICS @@ -468,10 +468,10 @@ class ValueNumStore ValueNum VNForSimd8Con(const simd8_t& cnsVal); ValueNum VNForSimd12Con(const simd12_t& cnsVal); ValueNum VNForSimd16Con(const simd16_t& cnsVal); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) ValueNum VNForSimd32Con(const simd32_t& cnsVal); ValueNum VNForSimd64Con(const simd64_t& cnsVal); -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum VNForSimdMaskCon(const simdmask_t& cnsVal); #endif // FEATURE_MASKED_HW_INTRINSICS @@ -1864,7 +1864,7 @@ class ValueNumStore return m_simd16CnsMap; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { static bool Equals(const simd32_t& x, const simd32_t& y) @@ -1942,7 +1942,7 @@ class ValueNumStore } return m_simd64CnsMap; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals @@ -2142,7 +2142,7 @@ struct ValueNumStore::VarTypConv typedef simd16_t Type; typedef simd16_t Lang; }; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) template <> struct ValueNumStore::VarTypConv { @@ -2156,7 +2156,7 @@ struct ValueNumStore::VarTypConv typedef simd64_t Type; typedef simd64_t Lang; }; -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) template <> @@ -2227,7 +2227,7 @@ FORCEINLINE simd16_t ValueNumStore::SafeGetConstantValue(Chunk* c, uns return reinterpret_cast::Lang*>(c->m_defs)[offset]; } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) template <> FORCEINLINE simd32_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) { @@ -2241,7 +2241,7 @@ FORCEINLINE simd64_t ValueNumStore::SafeGetConstantValue(Chunk* c, uns assert(c->m_typ == TYP_SIMD64); return reinterpret_cast::Lang*>(c->m_defs)[offset]; } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) template <> @@ -2294,7 +2294,7 @@ FORCEINLINE simd16_t ValueNumStore::ConstantValueInternal(ValueNum vn return SafeGetConstantValue(c, offset); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) template <> FORCEINLINE simd32_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) { @@ -2322,7 +2322,7 @@ FORCEINLINE simd64_t ValueNumStore::ConstantValueInternal(ValueNum vn return SafeGetConstantValue(c, offset); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) template <> From 4f92c239c6c490bfd4ee852f67bba6dfd079bd20 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 20 Mar 2025 17:52:54 -0700 Subject: [PATCH 010/120] Eliminate TYP_SIMDVL --- src/coreclr/inc/corhdr.h | 3 ++- src/coreclr/jit/abi.cpp | 16 +++++++++------- src/coreclr/jit/codegenarm64.cpp | 6 +++--- src/coreclr/jit/codegencommon.cpp | 4 ++-- src/coreclr/jit/compiler.cpp | 12 ++++++------ src/coreclr/jit/compiler.h | 28 +++++++++++++--------------- src/coreclr/jit/instr.cpp | 8 ++++---- src/coreclr/jit/lclvars.cpp | 3 --- src/coreclr/jit/lsra.cpp | 11 ++++------- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/scopeinfo.cpp | 8 ++------ src/coreclr/jit/typelist.h | 2 +- src/coreclr/jit/vartype.h | 13 +++++++++++++ 13 files changed, 60 insertions(+), 56 deletions(-) diff --git a/src/coreclr/inc/corhdr.h b/src/coreclr/inc/corhdr.h index 83beddf3bf7866..8322ee281bfb73 100644 --- a/src/coreclr/inc/corhdr.h +++ b/src/coreclr/inc/corhdr.h @@ -1750,7 +1750,8 @@ typedef enum CorInfoHFAElemType : unsigned { CORINFO_HFA_ELEM_DOUBLE, CORINFO_HFA_ELEM_VECTOR64, CORINFO_HFA_ELEM_VECTOR128, - CORINFO_HFA_ELEM_VECTOR_VL, + CORINFO_HFA_ELEM_VECTOR256, + CORINFO_HFA_ELEM_VECTOR512, } CorInfoHFAElemType; // diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 15abbd37bfb957..d5e393959f6234 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -123,14 +123,16 @@ var_types ABIPassingSegment::GetRegisterType() const #ifdef FEATURE_SIMD case 16: return TYP_SIMD16; -#endif - default: #ifdef TARGET_ARM64 - if (Size == Compiler::compVectorTLength) - { - return TYP_SIMDVL; - } -#endif + case 32: + assert(Size == Compiler::compVectorTLength); + return TYP_SIMD32; + case 64: + assert(Size == Compiler::compVectorTLength); + return TYP_SIMD64; +#endif // TARGET_ARM64 +#endif // FEATURE_SIMD + default: assert(!"Unexpected size for floating point register"); return TYP_UNDEF; } diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index af867d1d6641d8..290ea8d74c41f8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5423,7 +5423,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) regNumber tgtReg = node->GetRegNum(); #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (op1->TypeIs(TYP_SIMDVL)) + if (varTypeIsSIMDVL(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the @@ -5489,7 +5489,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) assert((varSize == 16) || (varSize == Compiler::compVectorTLength)); regNumber srcReg = node->GetRegNum(); - assert((srcReg != REG_NA) || (node->TypeIs(TYP_SIMDVL))); + assert((srcReg != REG_NA) || (varTypeIsSIMDVL(node->TypeGet()))); regNumber lclVarReg = genConsumeReg(lclNode); assert(lclVarReg != REG_NA); @@ -5503,7 +5503,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (TypeGet(op1) == TYP_SIMDVL) + if (varTypeIsSIMDVL(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 254e1ee6b88c64..0c57016532aa57 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3582,7 +3582,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(node->reg, copyType); insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = copyType == TYP_SIMDVL ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + opts = varTypeIsSIMDVL(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; #endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false, opts); @@ -3604,7 +3604,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = edge->type == TYP_SIMDVL ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + opts = varTypeIsSIMDVL(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; #endif GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true, opts); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index bbdab27aa34de4..25b8686f1fbded 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -493,10 +493,10 @@ Compiler::Compiler(ArenaAllocator* arena, #if defined(TARGET_ARM64) Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself - genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; - emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; - emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; - genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); + //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; + //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; + //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; + //genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); #endif // TARGET_ARM64 } @@ -687,7 +687,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS if (structSize == compVectorTLength) { var_types hfaType = GetHfaType(clsHnd); - return hfaType == TYP_SIMDVL ? TYP_SIMDVL : TYP_UNKNOWN; + return varTypeIsSIMDVL(hfaType) ? hfaType : TYP_UNKNOWN; } #endif } @@ -918,7 +918,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, if (canReturnInRegister && (useType == TYP_UNKNOWN) && ((structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 - || ((GetHfaType(clsHnd) == TYP_SIMDVL) && (structSize == compVectorTLength))) + || ((varTypeIsSIMDVL(GetHfaType(clsHnd))) && (structSize == compVectorTLength))) #endif ) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 91f3b33bcf95f3..186235f1565395 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -153,14 +153,16 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind) case CORINFO_HFA_ELEM_DOUBLE: return TYP_DOUBLE; #ifdef FEATURE_SIMD -#ifdef TARGET_ARM64 - case CORINFO_HFA_ELEM_VECTOR_VL: - return TYP_SIMDVL; -#endif case CORINFO_HFA_ELEM_VECTOR64: return TYP_SIMD8; case CORINFO_HFA_ELEM_VECTOR128: return TYP_SIMD16; +#ifdef TARGET_ARM64 + case CORINFO_HFA_ELEM_VECTOR256: + return TYP_SIMD32; + case CORINFO_HFA_ELEM_VECTOR512: + return TYP_SIMD64; +#endif // TARGET_ARM64 #endif case CORINFO_HFA_ELEM_NONE: return TYP_UNDEF; @@ -178,14 +180,16 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type) case TYP_DOUBLE: return CORINFO_HFA_ELEM_DOUBLE; #ifdef FEATURE_SIMD -#ifdef TARGET_ARM64 - case TYP_SIMDVL: - return CORINFO_HFA_ELEM_VECTOR_VL; -#endif case TYP_SIMD8: return CORINFO_HFA_ELEM_VECTOR64; case TYP_SIMD16: return CORINFO_HFA_ELEM_VECTOR128; +#ifdef TARGET_ARM64 + case TYP_SIMD32: + return CORINFO_HFA_ELEM_VECTOR256; + case TYP_SIMD64: + return CORINFO_HFA_ELEM_VECTOR512; +#endif #endif case TYP_UNDEF: return CORINFO_HFA_ELEM_NONE; @@ -8223,7 +8227,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMDVL) || (type == TYP_SIMD32) || (type == TYP_SIMD64)); + return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMD32) || (type == TYP_SIMD64)); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") @@ -9382,12 +9386,6 @@ class Compiler { simdType = TYP_SIMD16; } -#if defined(TARGET_ARM64) - else if (size == compVectorTLength) - { - simdType = TYP_SIMDVL; - } -#endif // TARGET_ARM64 #if defined(TARGET_XARCH) || defined(TARGET_ARM64) else if (size == 32) { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 7ef72afd937b5d..26d60f0b142657 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1914,7 +1914,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #endif } #ifdef TARGET_ARM64 - else if (srcType == TYP_SIMDVL) + else if (varTypeIsSIMDVL(srcType)) { return INS_sve_ldr; } @@ -2008,7 +2008,7 @@ instruction CodeGen::ins_Copy(var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (dstType == TYP_SIMDVL) + else if (varTypeIsSIMDVL(dstType)) { return INS_sve_mov; } @@ -2135,7 +2135,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (dstType == TYP_SIMDVL) + else if (varTypeIsSIMDVL(dstType)) { return INS_sve_mov; } @@ -2252,7 +2252,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #endif } #ifdef TARGET_ARM64 - else if (dstType == TYP_SIMDVL) + else if ((dstType == TYP_SIMD32) || (dstType == TYP_SIMD64)) { return INS_sve_str; } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4186b73085f1e5..7d221a67d0e7ca 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3108,9 +3108,6 @@ void Compiler::lvaSortByRefCount() case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#ifdef TARGET_ARM64 - case TYP_SIMDVL: -#endif #if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6a382c20dc5436..36362d6516a544 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1603,9 +1603,6 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_ARM64) - case TYP_SIMDVL: -#endif // TARGET_ARM64 #if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: @@ -5921,7 +5918,7 @@ void LinearScan::allocateRegisters() lclVarInterval->isPartiallySpilled = true; } #elif defined(TARGET_ARM64) - else if (lclVarInterval->registerType == TYP_SIMDVL) + else if (varTypeIsSIMDVL(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -5941,7 +5938,7 @@ void LinearScan::allocateRegisters() { lclVarInterval->isPartiallySpilled = false; #if defined(TARGET_ARM64) - if (lclVarInterval->registerType == TYP_SIMDVL) + if (varTypeIsSIMDVL(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -7423,7 +7420,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, // while on x86 we can spill directly to memory. regNumber spillReg = refPosition->assignedReg(); #ifdef TARGET_ARM64 - bool isVariableVL = tree->TypeIs(TYP_SIMDVL); + bool isVariableVL = varTypeIsSIMDVL(tree->TypeGet()); bool spillToMem = refPosition->spillAfter || isVariableVL; assert((spillReg != REG_NA) || isVariableVL); #else @@ -7526,7 +7523,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, simdUpperRestore->gtFlags |= GTF_NOREG_AT_USE; #else simdUpperRestore->gtFlags |= GTF_SPILLED; - assert((refPosition->assignedReg() != REG_NA) || (restoreLcl->TypeIs(TYP_SIMDVL))); + assert((refPosition->assignedReg() != REG_NA) || (varTypeIsSIMDVL(restoreLcl->TypeGet()))); restoreReg = refPosition->assignedReg(); #endif } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index ef10d00939c35d..41ee58117268e5 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1482,7 +1482,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, #ifdef TARGET_XARCH forceRegOptional = true; #elif TARGET_ARM64 - forceRegOptional = tree->TypeIs(TYP_SIMDVL); + forceRegOptional = varTypeIsSIMDVL(tree->TypeGet()); #endif if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars)) { diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 40c1133ee816c0..c27684060d4e0a 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -292,9 +292,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_ARM64) - case TYP_SIMDVL: -#elif defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH || TARGET_ARM64 @@ -434,9 +432,7 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: -#if defined(TARGET_ARM64) - case TYP_SIMDVL: -#elif defined(TARGET_XARCH) || defined(TARGET_ARM64) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) case TYP_SIMD32: case TYP_SIMD64: #endif // TARGET_XARCH || TARGET_ARM64 diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 9f25a552202147..2f1f22fcf29521 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -61,7 +61,7 @@ DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, 8, 8, 8, 2, 8, VTR_FLOAT, available DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, 12,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #if defined(TARGET_ARM64) -DEF_TP(SIMDVL ,"simdVL" , TYP_SIMDVL, -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) +//DEF_TP(SIMDVL ,"simdVL" , TYP_SIMDVL, -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) #endif // TARGET_ARM64 #if defined(TARGET_XARCH) || defined(TARGET_ARM64) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index c0cfa87775dab6..2f1ae6fe395d93 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -71,6 +71,19 @@ inline var_types TypeGet(var_types v) return v; } +#ifdef TARGET_ARM64 +inline bool varTypeIsSIMDVL(var_types vt) +{ +#ifdef FEATURE_SIMD + return (vt == TYP_SIMD32) || (vt == TYP_SIMD64); +#else + // Always return false if FEATURE_SIMD is not enabled + return false; +#endif +} +#endif // TARGET_ARM64 + + template inline bool varTypeIsSIMD(T vt) { From 6e63a3c55be2eee1ff84af35fb46b414e66b43be Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 21 Mar 2025 12:49:41 -0700 Subject: [PATCH 011/120] basic scneario of calling args/returning args --- src/coreclr/jit/codegencommon.cpp | 18 ++++++--- src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/hwintrinsic.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 18 +++++++++ src/coreclr/jit/importercalls.cpp | 45 +++++++++++++---------- src/coreclr/jit/lsra.cpp | 7 ++++ src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/simd.h | 2 + src/coreclr/vm/callingconvention.h | 3 +- src/coreclr/vm/class.cpp | 33 ++++++++++++++--- 10 files changed, 98 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 0c57016532aa57..07a3c3857f0739 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3580,12 +3580,15 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) busyRegs |= genRegMask(node->copiedReg); instruction ins = ins_Copy(node->reg, copyType); - insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = varTypeIsSIMDVL(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; -#endif + insOpts opts = varTypeIsSIMDVL(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false, opts); +#else + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, + /* canSkip */ false); +#endif + if (node->copiedReg == initReg) { *initRegStillZeroed = false; @@ -3602,12 +3605,15 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); - insOpts opts = INS_OPTS_NONE; #ifdef TARGET_ARM64 - opts = varTypeIsSIMDVL(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; -#endif + insOpts opts = varTypeIsSIMDVL(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true, opts); +#else + GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, + /* canSkip */ true); +#endif + break; } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 25b8686f1fbded..48ca8786dc2239 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -916,9 +916,9 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. if (canReturnInRegister && (useType == TYP_UNKNOWN) && - ((structSize <= MAX_PASS_SINGLEREG_BYTES) + (structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 - || ((varTypeIsSIMDVL(GetHfaType(clsHnd))) && (structSize == compVectorTLength))) + || (varTypeIsSIMDVL(GetHfaType(clsHnd)) && (structSize == compVectorTLength)) #endif ) { diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a208f1e5777f6a..862fdc8f45912a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -866,6 +866,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // Sha1_Arm64 { NI_Illegal, NI_Illegal }, // Sha256_Arm64 { NI_Illegal, NI_Illegal }, // Sve_Arm64 + { FIRST_NI_Vector, LAST_NI_Vector }, #else #error Unsupported platform #endif diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9bb76b0ad038a5..f87cc5f208cef2 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -10,6 +10,24 @@ // clang-format off #ifdef FEATURE_HW_INTRINSICS + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Vector +#define FIRST_NI_Vector NI_Vector_As +HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, CreateScalar, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +#define LAST_NI_Vector NI_Vector_op_Addition + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 23ceed3cde357d..115d6d13b1db60 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -10653,38 +10653,45 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) uint32_t size = getVectorTByteLength(); assert((size == 16) || (size == 32) || (size == 64)); + bool useAgnosticVL = false; +#ifdef TARGET_ARM64 + useAgnosticVL = compExactlyDependsOn(InstructionSet_Sve_Arm64) && (size > 16); +#endif const char* lookupClassName = className; - switch (size) + if (!useAgnosticVL) { - case 16: + switch (size) { - lookupClassName = isVectorT ? "Vector128`1" : "Vector128"; - break; - } + case 16: + { + lookupClassName = isVectorT ? "Vector128`1" : "Vector128"; + break; + } - case 32: - { - lookupClassName = isVectorT ? "Vector256`1" : "Vector256"; - break; - } + case 32: + { + lookupClassName = isVectorT ? "Vector256`1" : "Vector256"; + break; + } - case 64: - { - lookupClassName = isVectorT ? "Vector512`1" : "Vector512"; - break; - } + case 64: + { + lookupClassName = isVectorT ? "Vector512`1" : "Vector512"; + break; + } - default: - { - unreached(); + default: + { + unreached(); + } } } const char* lookupMethodName = methodName; - if ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0')) + if (!useAgnosticVL && ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0'))) { if (strncmp(methodName + 2, "Vector", 6) == 0) { diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 36362d6516a544..d3024af276bf9f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -10658,7 +10658,14 @@ void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDes { if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED) { + +#ifdef TARGET_ARM64 +//TODO-VL: Evaluate this + assert(tree->gtHasReg(compiler) || + (tree->OperIs(GT_INTRINSIC) && (tree->AsIntrinsic()->gtIntrinsicName == NI_SIMD_UpperRestore))); +#else assert(tree->gtHasReg(compiler)); +#endif } lsraGetOperandString(tree, mode, operandString, operandStringLength); printf("%-15s =", operandString); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 41ee58117268e5..f3958c8d432ad7 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1525,7 +1525,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, varInterval->isPartiallySpilled = true; pos->skipSaveRestore = blockAlwaysReturn; pos->liveVarUpperSave = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex); - pos->regOptional = forceRegOptional; + pos->regOptional = forceRegOptional; } } } diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 0472047f971c0f..cf2fc0edeeea05 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -359,6 +359,8 @@ static_assert_no_msg(sizeof(simdmask_t) == 8); #if defined(TARGET_XARCH) || defined(TARGET_ARM64) typedef simd64_t simd_t; +#else +typedef simd16_t simd_t; #endif inline bool IsUnaryBitwiseOperation(genTreeOps oper) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 7117ae67c9a047..04a62908e95cc2 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -61,7 +61,8 @@ struct ArgLocDesc case CORINFO_HFA_ELEM_VECTOR64: return 8; case CORINFO_HFA_ELEM_VECTOR128: return 16; #if defined(TARGET_ARM64) - case CORINFO_HFA_ELEM_VECTOR_VL: return g_sve_length; // TODO-VL: Need to return the cached value + case CORINFO_HFA_ELEM_VECTOR256: return 32; // TODO-VL: Need to return the cached value + case CORINFO_HFA_ELEM_VECTOR512: return 64; // TODO-VL: Need to return the cached value #endif default: _ASSERTE(!"Invalid HFA Type"); return 0; } diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index dd8d3ad294af96..d7a73aa57dfd55 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -1788,10 +1788,18 @@ CorInfoHFAElemType MethodTable::GetHFAType() { return CORINFO_HFA_ELEM_VECTOR128; } + else if (vectorSize == 32) + { + return CORINFO_HFA_ELEM_VECTOR256; + } + else if (vectorSize == 64) + { + return CORINFO_HFA_ELEM_VECTOR512; + } else { - assert ((vectorSize % 16) == 0); - return CORINFO_HFA_ELEM_VECTOR_VL; + assert ("Invalid vectorSize"); + return CORINFO_HFA_ELEM_VECTOR128; } } @@ -1905,10 +1913,20 @@ EEClass::CheckForHFA() { fieldHFAType = CORINFO_HFA_ELEM_VECTOR128; } +#ifdef TARGET_ARM64 + else if (thisElemSize == 32) + { + fieldHFAType = CORINFO_HFA_ELEM_VECTOR256; + } + else if (thisElemSize == 64) + { + fieldHFAType = CORINFO_HFA_ELEM_VECTOR512; + } +#endif // TARGET_ARM64 else { - assert ((thisElemSize % 16) == 0); - fieldHFAType = CORINFO_HFA_ELEM_VECTOR_VL; + assert ("Invalid element size %u", thisElemSize); + fieldHFAType = CORINFO_HFA_ELEM_VECTOR128; } } else @@ -1980,9 +1998,12 @@ EEClass::CheckForHFA() case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; - case CORINFO_HFA_ELEM_VECTOR_VL: - elemSize = g_sve_length; //TODO-VL: Need to cache it + case CORINFO_HFA_ELEM_VECTOR256: + elemSize = 32; break; + case CORINFO_HFA_ELEM_VECTOR512: + elemSize = 64; + break; #endif default: // ELEMENT_TYPE_END From 1eb159f96aec95633918eb230ee6b6c727eabe5a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 21 Mar 2025 17:37:00 -0700 Subject: [PATCH 012/120] returning Vectors --- src/coreclr/jit/codegenarm64.cpp | 80 +++++++++++++++++++++++ src/coreclr/jit/emit.cpp | 4 +- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 +- src/coreclr/jit/valuenum.cpp | 2 +- 7 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 290ea8d74c41f8..328b4cc42df102 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2500,6 +2500,86 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } break; } + case TYP_SIMD32: + case TYP_SIMD64: + { + // Use scalable registers + if (vecCon->IsAllBitsSet()) + { + // Use Scalable_B because for Ones, it doesn't matter. + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, 0xFF, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (vecCon->IsZero()) + { + // Use Scalable_B because for Zero, it doesn't matter. + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, 0, INS_OPTS_SCALABLE_B); + } + else + { + if (tree->TypeGet() == TYP_SIMD32) + { + simd32_t val = vecCon->gtSimd32Val; + if (ElementsAreSame(val.i32, 8) && + emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i16, 16) && + emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i8, 32) && emitter::isValidSimm<8>(val.i8[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_IMM_BITMASK); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = internalRegisters.GetSingle(tree); + CORINFO_FIELD_HANDLE hnd; + simd32_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd32_t)); + hnd = emit->emitSimd32Const(constValue); + emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + } + } + else + { + simd64_t val = vecCon->gtSimd64Val; + if (ElementsAreSame(val.i32, 16) && emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i16, 32) && + emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i8, 64) && emitter::isValidSimm<8>(val.i8[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = internalRegisters.GetSingle(tree); + CORINFO_FIELD_HANDLE hnd; + simd64_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); + hnd = emit->emitSimd64Const(constValue); + emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + } + } + } + break; + } default: { diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 91e9ae74229c94..3504d940e42185 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8164,7 +8164,7 @@ CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue) return emitComp->eeFindJitDataOffs(cnum); } -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue) { unsigned cnsSize = 32; @@ -8193,7 +8193,7 @@ CORINFO_FIELD_HANDLE emitter::emitSimd64Const(simd64_t constValue) return emitComp->eeFindJitDataOffs(cnum); } -#endif // TARGET_XARCH +#endif // TARGET_XARCH || TARGET_ARM64 #if defined(FEATURE_MASKED_HW_INTRINSICS) CORINFO_FIELD_HANDLE emitter::emitSimdMaskConst(simdmask_t constValue) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 1a72001b8d297a..c878794d719da5 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2587,7 +2587,7 @@ class emitter #if defined(FEATURE_SIMD) CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue); CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue); CORINFO_FIELD_HANDLE emitSimd64Const(simd64_t constValue); #endif // TARGET_XARCH diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9c26f608965480..d1ac13a9d37828 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -240,7 +240,7 @@ void GenTree::InitNodeSize() // clang-format off GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE; -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#if defined(FEATURE_SIMD) && (defined(TARGET_XARCH) || defined(TARGET_ARM64)) GenTree::s_gtNodeSizes[GT_CNS_VEC] = TREE_NODE_SZ_LARGE; #endif // FEATURE_SIMD && TARGET_XARCH GenTree::s_gtNodeSizes[GT_CAST] = TREE_NODE_SZ_LARGE; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index a040bc558350bb..57b0d60a5d5738 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7296,7 +7296,7 @@ struct GenTreeVecCon : public GenTree // buffer will cause determinism issues with the compiler. memset(>SimdVal, 0, sizeof(gtSimdVal)); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) assert(sizeof(simd_t) == sizeof(simd64_t)); #else assert(sizeof(simd_t) == sizeof(simd16_t)); diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index f87cc5f208cef2..64210bcde927a0 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -26,7 +26,7 @@ HARDWARE_INTRINSIC(Vector, get_Zero, HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_op_Addition +#define LAST_NI_Vector NI_Vector_op_Inequality // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 5af515a1d81ec4..895b3c8c571621 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -434,7 +434,7 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) , m_simd8CnsMap(nullptr) , m_simd12CnsMap(nullptr) , m_simd16CnsMap(nullptr) -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) , m_simd32CnsMap(nullptr) , m_simd64CnsMap(nullptr) #endif // TARGET_XARCH From df7203fbb8cb8c6d8d58cd312845785086824203 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 21 Mar 2025 17:45:29 -0700 Subject: [PATCH 013/120] fix a bug --- src/coreclr/jit/codegenarm64.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 328b4cc42df102..42bacace24c1e0 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3206,7 +3206,17 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } } emitAttr attr = emitActualTypeSize(targetType); - GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired); + if (attr == EA_SCALABLE) + { + //TODO-VL: Should we check the baseType or it doesn't matter because it is just reg->reg move + GetEmitter()->emitIns_Mov(INS_sve_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired, + INS_OPTS_SCALABLE_Q); + } + else + { + GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired); + } + } /*********************************************************************************************** From 734aba53acafd040df30536a73ccd3c7e1ad5698 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 21 Mar 2025 19:24:08 -0700 Subject: [PATCH 014/120] standalone fix to generate sve mov instead of NEON mov --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2751839a89b546..3a52e6371e3fe4 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1857,6 +1857,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Vector_ToScalar: case NI_Vector64_ToScalar: case NI_Vector128_ToScalar: { @@ -2544,8 +2545,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert((targetReg == op2Reg) || (targetReg != op1Reg)); assert((targetReg == op2Reg) || (targetReg != op3Reg)); - GetEmitter()->emitIns_Mov(INS_mov, emitSize, targetReg, op2Reg, - /* canSkip */ true); + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op2Reg, /* canSkip */ true, opt); GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt, INS_SCALABLE_OPTS_NONE); break; @@ -2561,8 +2561,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(varTypeIsFloating(node->gtType) || varTypeIsSIMD(node->gtType)); assert((targetReg == op2Reg) || (targetReg != op1Reg)); assert((targetReg == op2Reg) || (targetReg != op3Reg)); - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg, - /* canSkip */ true); + + GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op2Reg, /* canSkip */ true, opt); GetEmitter()->emitInsSve_R_R_R(ins, EA_SCALABLE, targetReg, op1Reg, op3Reg, opt, INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); break; From a71b8de45e56d83ed18013b8622b899ceedbd47a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 24 Mar 2025 14:54:43 -0700 Subject: [PATCH 015/120] standalone fix to generate ldr/str when emit_RR is called --- src/coreclr/jit/emitarm64sve.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 88c0d2d1848cad..06a3d709387da5 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2256,7 +2256,14 @@ void emitter::emitInsSve_R_R(instruction ins, // Thus, MOV is the preferred disassembly. ins = INS_sve_mov; break; - + case INS_sve_ldr: + case INS_sve_str: + { + // We might come here through emitIns_R_R() to emit "ldr Zx, [Xn]" and + // in the case, just generate the ldr variant, where offset is zero. + emitInsSve_R_R_I(ins, attr, reg1, reg2, 0, opt, sopt); + return; + } default: unreached(); break; From 2e8cfd592705feb1154f5004daaf68b356217675 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 24 Mar 2025 14:55:20 -0700 Subject: [PATCH 016/120] Support Vector.Create --- src/coreclr/jit/codegenarm64.cpp | 16 ++++++---------- src/coreclr/jit/fgbasic.cpp | 1 + src/coreclr/jit/gentree.cpp | 8 ++++++-- src/coreclr/jit/gentree.h | 2 ++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 - src/coreclr/jit/importercalls.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 4 ++++ src/coreclr/jit/morph.cpp | 1 + 9 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 42bacace24c1e0..6240267f5c1b43 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2520,21 +2520,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (tree->TypeGet() == TYP_SIMD32) { simd32_t val = vecCon->gtSimd32Val; - if (ElementsAreSame(val.i32, 8) && - emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) + if (ElementsAreSame(val.i8, 32)) { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_IMM_BITMASK); + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B); } - else if (ElementsAreSame(val.i16, 16) && - emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) + else if (ElementsAreSame(val.i16, 16)) { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_IMM_BITMASK); + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H); } - else if (ElementsAreSame(val.i8, 32) && emitter::isValidSimm<8>(val.i8[0])) + else if (ElementsAreSame(val.i32, 8)) { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_IMM_BITMASK); + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S); } else { diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 613a0538c71883..65ea5b5e06de73 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1207,6 +1207,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_ArmBase_Arm64_ReverseElementBits: case NI_ArmBase_LeadingZeroCount: case NI_ArmBase_ReverseElementBits: + case NI_Vector_Create: case NI_Vector64_Create: case NI_Vector64_CreateScalar: case NI_Vector64_CreateScalarUnsafe: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d1ac13a9d37828..5e93a7f49a0ad9 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3892,6 +3892,7 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) case NI_Vector512_CreateScalar: case NI_Vector512_CreateScalarUnsafe: #elif defined(TARGET_ARM64) + case NI_Vector_Create: case NI_Vector64_Create: case NI_Vector64_CreateScalar: case NI_Vector64_CreateScalarUnsafe: @@ -22742,7 +22743,6 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, unreached(); } } - return vecCon; } @@ -22766,7 +22766,11 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, hwIntrinsicID = NI_Vector256_Create; } #elif defined(TARGET_ARM64) - if (simdSize == 8) + if ((simdSize == 64) || (simdSize == 32)) + { + hwIntrinsicID = NI_Vector_Create; + } + else if (simdSize == 8) { hwIntrinsicID = NI_Vector64_Create; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 57b0d60a5d5738..3e9e9730d87dcb 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6668,6 +6668,7 @@ struct GenTreeVecCon : public GenTree case NI_Vector256_CreateScalarUnsafe: case NI_Vector512_CreateScalarUnsafe: #elif defined(TARGET_ARM64) + case NI_Vector_Create: case NI_Vector64_Create: case NI_Vector64_CreateScalar: case NI_Vector64_CreateScalarUnsafe: @@ -9659,6 +9660,7 @@ inline bool GenTree::IsVectorCreate() const case NI_Vector256_Create: case NI_Vector512_Create: #elif defined(TARGET_ARMARCH) + case NI_Vector_Create: case NI_Vector64_Create: #endif return true; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5313e4201756f2..9e593fb3183c2c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1073,6 +1073,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Create: case NI_Vector64_Create: case NI_Vector128_Create: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 64210bcde927a0..d91a617c00296d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -19,7 +19,6 @@ #define FIRST_NI_Vector NI_Vector_As HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector, CreateScalar, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 115d6d13b1db60..1e697a43f1352a 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3240,7 +3240,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, // handled by the AltJit, so limit only the platform specific intrinsics assert((LAST_NI_Vector128 + 1) == FIRST_NI_AdvSimd); - if (ni < LAST_NI_Vector128) + if ((ni < LAST_NI_Vector128) || ((ni >= FIRST_NI_Vector) && (ni < LAST_NI_Vector))) #else #error Unsupported platform #endif diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 963d0c4d1fbb7a..e90750f937e9fa 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1612,6 +1612,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { +#ifdef TARGET_ARM64 + case NI_Vector_Create: +#endif case NI_Vector64_Create: case NI_Vector128_Create: case NI_Vector64_CreateScalar: @@ -2016,6 +2019,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { assert((node->GetHWIntrinsicId() == NI_Vector64_Create) || (node->GetHWIntrinsicId() == NI_Vector128_Create) || + (node->GetHWIntrinsicId() == NI_Vector_Create) || (node->GetHWIntrinsicId() == NI_Vector64_CreateScalar) || (node->GetHWIntrinsicId() == NI_Vector128_CreateScalar) || (node->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe) || diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 53b9bc8166d44b..29c67adb547dbd 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9224,6 +9224,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { #if defined(TARGET_ARM64) + case NI_Vector_Create: case NI_Vector64_Create: #endif // TARGET_ARM64 case NI_Vector128_Create: From 1d74f822299e793bbb824059d477a1c3f412e849 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 09:47:40 -0700 Subject: [PATCH 017/120] Do not do sve_mov for scalar variant --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 3a52e6371e3fe4..8abba971d82524 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2545,7 +2545,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert((targetReg == op2Reg) || (targetReg != op1Reg)); assert((targetReg == op2Reg) || (targetReg != op3Reg)); - GetEmitter()->emitIns_Mov(INS_sve_mov, EA_SCALABLE, targetReg, op2Reg, /* canSkip */ true, opt); + GetEmitter()->emitIns_Mov(INS_mov, emitSize, targetReg, op2Reg, + /* canSkip */ true); GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt, INS_SCALABLE_OPTS_NONE); break; From 699d2e1255db76ec268df7b2871d49aefca4c9e4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 10:49:13 -0700 Subject: [PATCH 018/120] Support Vector.As --- src/coreclr/jit/fgbasic.cpp | 1 + src/coreclr/jit/hwintrinsicarm64.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 65ea5b5e06de73..4d412501e0f7a9 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1439,6 +1439,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed #if defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_ARM64) + case NI_Vector_As: case NI_Vector64_As: case NI_Vector64_AsByte: case NI_Vector64_AsDouble: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9e593fb3183c2c..cc2b15469a4311 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -754,6 +754,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_As: case NI_Vector64_As: case NI_Vector64_AsByte: case NI_Vector64_AsDouble: From 7f8ff24f0c434c6cb321cc67e26e1ebcc7a76c17 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 11:19:58 -0700 Subject: [PATCH 019/120] Support Vector.Abs --- src/coreclr/jit/gentree.cpp | 24 ++++++++++++++++++++- src/coreclr/jit/gentree.h | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 +- src/coreclr/jit/hwintrinsiclistarm64sve.h | 17 ++++++++------- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5e93a7f49a0ad9..32b90120fecf01 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20857,7 +20857,6 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si } #elif defined(TARGET_ARM64) NamedIntrinsic intrinsic = NI_AdvSimd_Abs; - if (simdBaseType == TYP_DOUBLE) { intrinsic = (simdSize == 8) ? NI_AdvSimd_AbsScalar : NI_AdvSimd_Arm64_Abs; @@ -20867,6 +20866,8 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_AbsScalar : NI_AdvSimd_Arm64_Abs; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); #else @@ -28784,6 +28785,27 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } } + +//------------------------------------------------------------------------------ +// GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable +// +NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id) +{ + // TODO-VL: Convert this in single check when we introduce TYP_SIMDVL + if ((simdType == TYP_SIMD16) || (simdType == TYP_SIMD8)) + { + return id; + } + switch (id) + { + case NI_AdvSimd_Abs: + case NI_AdvSimd_Arm64_Abs: + return NI_Sve_Abs; + default: + return id; + } +} + //------------------------------------------------------------------------------ // GetHWIntrinsicIdForUnOp: Returns intrinsic ID based on the oper, base type, and simd size // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 3e9e9730d87dcb..36df2a253756a4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6571,6 +6571,10 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); +#ifdef TARGET_ARM64 + static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id); +#endif + static NamedIntrinsic GetHWIntrinsicIdForUnOp( Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index cc2b15469a4311..35998f36292894 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -697,6 +697,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { + case NI_Vector_Abs: case NI_Vector64_Abs: case NI_Vector128_Abs: { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 8abba971d82524..4b5713b628f587 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1857,7 +1857,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Vector_ToScalar: + //case NI_Vector_ToScalar: case NI_Vector64_ToScalar: case NI_Vector128_ToScalar: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index d91a617c00296d..4a00deb7e14fcf 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -16,16 +16,17 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector -#define FIRST_NI_Vector NI_Vector_As +#define FIRST_NI_Vector NI_Vector_Abs +HARDWARE_INTRINSIC(Vector, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_op_Inequality +//HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) +//HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +//HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +//HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +//HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +//HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +#define LAST_NI_Vector NI_Vector_Create // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags From 3d19d51aa294c6cd61f2d0fbd1cffbbe06cfcb78 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 12:03:23 -0700 Subject: [PATCH 020/120] Support Vector.Add --- src/coreclr/jit/gentree.cpp | 45 +++++++++++++++-------- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 2 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 5 ++- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 32b90120fecf01..8aa3c1953b1cda 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28789,21 +28789,28 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // -NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id) +NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) { - // TODO-VL: Convert this in single check when we introduce TYP_SIMDVL - if ((simdType == TYP_SIMD16) || (simdType == TYP_SIMD8)) + NamedIntrinsic sveId = id; + if (simdSize > 16) { - return id; - } - switch (id) - { - case NI_AdvSimd_Abs: - case NI_AdvSimd_Arm64_Abs: - return NI_Sve_Abs; - default: - return id; + switch (id) + { + case NI_AdvSimd_Abs: + case NI_AdvSimd_Arm64_Abs: + sveId = NI_Sve_Abs; + break; + case NI_AdvSimd_Add: + case NI_AdvSimd_Arm64_Add: + sveId = NI_Sve_Add; + break; + default: + sveId = id; + } } + // Make sure if we are using VL SIMD, we are not generating AdvSimd/NEON intrinsics + assert((simdSize <= 16) || (sveId < FIRST_NI_AdvSimd) || (sveId > LAST_NI_AdvSimd)); + return sveId; } //------------------------------------------------------------------------------ @@ -28931,6 +28938,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); +#if defined(TARGET_ARM64) + assert(!isScalar || (simdSize == 8)); + assert(!isScalar || varTypeIsFloating(simdBaseType)); + assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); + assert((simdSize <= 16) || (simdSize == Compiler::compVectorTLength)); +#else if (simdSize == 64) { assert(!isScalar); @@ -28943,13 +28956,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { -#if defined(TARGET_ARM64) - assert(!isScalar || (simdSize == 8)); -#endif // TARGET_ARM64 - assert(!isScalar || varTypeIsFloating(simdBaseType)); assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } +#endif // TARGET_ARM64 NamedIntrinsic id = NI_Illegal; @@ -29683,6 +29693,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } } +#ifdef TARGET_ARM64 + id = GetScalableHWIntrinsicId(simdSize, id); +#endif return id; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 36df2a253756a4..f3fd5d59964cb7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6572,7 +6572,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); #ifdef TARGET_ARM64 - static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id); + static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); #endif static NamedIntrinsic GetHWIntrinsicIdForUnOp( diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 35998f36292894..37bc413c27c64b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -707,6 +707,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Add: + case NI_Vector_op_Addition: case NI_Vector64_op_Addition: case NI_Vector128_op_Addition: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 4a00deb7e14fcf..0f975628aebc63 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -18,15 +18,16 @@ // Vector #define FIRST_NI_Vector NI_Vector_Abs HARDWARE_INTRINSIC(Vector, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Add, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -//HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) //HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_Create +#define LAST_NI_Vector NI_Vector_op_Addition // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags From 70c09f9ca51413c22906065cdc8b2ef5b18f774d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 12:28:21 -0700 Subject: [PATCH 021/120] Introduce VariableVectorLength env variable --- src/coreclr/jit/compiler.cpp | 3 ++- src/coreclr/jit/jitconfigvalues.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 48ca8786dc2239..7001eda4e4452f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -492,7 +492,8 @@ Compiler::Compiler(ArenaAllocator* arena, info.compIsVarArgs = false; #if defined(TARGET_ARM64) - Compiler::compVectorTLength = 32; // TODO-VL: This should come from runtime itself + // TODO-VL: This should come from runtime itself and then override with this environment variable + Compiler::compVectorTLength = ReinterpretHexAsDecimal(JitConfig.VariableVectorLength()); //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 82e33b64c1c051..d8585f95dba254 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -382,6 +382,9 @@ CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) #endif RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. +#if defined(TARGET_ARM64) +RELEASE_CONFIG_INTEGER(VariableVectorLength, "VariableVectorLength", 0x10) // The preferred decimal bytes for VL +#endif // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h From 53df3d7e202dcc88c379f13912e4ed697d1d55ef Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 15:12:49 -0700 Subject: [PATCH 022/120] Support Vector.AndNot --- src/coreclr/jit/gentree.cpp | 20 ++++++++++++++++---- src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8aa3c1953b1cda..3e7dc925095300 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28804,6 +28804,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_Add: sveId = NI_Sve_Add; break; + case NI_AdvSimd_And: + sveId = NI_Sve_And; + break; + case NI_AdvSimd_Not: + sveId = NI_Sve_Not; + break; default: sveId = id; } @@ -28835,6 +28841,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( assert(varTypeIsArithmetic(simdBaseType)); assert(varTypeIsSIMD(simdType)); +#if defined(TARGET_ARM64) + assert(!isScalar || (simdSize == 8)); + assert(!isScalar || varTypeIsFloating(simdBaseType)); + assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); + assert((simdSize <= 16) || (simdSize == Compiler::compVectorTLength)); +#else if (simdSize == 64) { assert(!isScalar); @@ -28847,13 +28859,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } else { -#if defined(TARGET_ARM64) - assert(!isScalar || (simdSize == 8)); -#endif // TARGET_ARM64 - assert(!isScalar || varTypeIsFloating(simdBaseType)); assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } +#endif assert(op1 != nullptr); assert(op1->TypeIs(simdType)); @@ -28903,6 +28912,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } } +#if defined(TARGET_ARM64) + id = GetScalableHWIntrinsicId(simdSize, id); +#endif return id; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 37bc413c27c64b..2437cb375fd90b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -722,6 +722,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_AdvSimd_BitwiseClear: + case NI_Vector_AndNot: case NI_Vector64_AndNot: case NI_Vector128_AndNot: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 0f975628aebc63..2aca6b0ebb586e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -19,6 +19,7 @@ #define FIRST_NI_Vector NI_Vector_Abs HARDWARE_INTRINSIC(Vector, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Add, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, AndNot, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) From b1d4ce96fd9543b1ec7dbf1eba74c63b125f64fc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 Mar 2025 20:29:55 -0700 Subject: [PATCH 023/120] Support Vector.As* --- src/coreclr/jit/fgbasic.cpp | 12 ++++++++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 12 ++++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 13 +++++++++++++ 3 files changed, 37 insertions(+) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 4d412501e0f7a9..de56c5a1f7dc14 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1440,6 +1440,18 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed #if defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_ARM64) case NI_Vector_As: + case NI_Vector_AsVectorByte: + case NI_Vector_AsVectorDouble: + case NI_Vector_AsVectorInt16: + case NI_Vector_AsVectorInt32: + case NI_Vector_AsVectorInt64: + case NI_Vector_AsVectorNInt: + case NI_Vector_AsVectorNUInt: + case NI_Vector_AsVectorSByte: + case NI_Vector_AsVectorSingle: + case NI_Vector_AsVectorUInt16: + case NI_Vector_AsVectorUInt32: + case NI_Vector_AsVectorUInt64: case NI_Vector64_As: case NI_Vector64_AsByte: case NI_Vector64_AsDouble: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2437cb375fd90b..1cef845aec7062 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -759,6 +759,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_As: + case NI_Vector_AsVectorByte: + case NI_Vector_AsVectorDouble: + case NI_Vector_AsVectorInt16: + case NI_Vector_AsVectorInt32: + case NI_Vector_AsVectorInt64: + case NI_Vector_AsVectorNInt: + case NI_Vector_AsVectorNUInt: + case NI_Vector_AsVectorSByte: + case NI_Vector_AsVectorSingle: + case NI_Vector_AsVectorUInt16: + case NI_Vector_AsVectorUInt32: + case NI_Vector_AsVectorUInt64: case NI_Vector64_As: case NI_Vector64_AsByte: case NI_Vector64_AsDouble: diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 2aca6b0ebb586e..05a4278659cf55 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -21,6 +21,19 @@ HARDWARE_INTRINSIC(Vector, Abs, HARDWARE_INTRINSIC(Vector, Add, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, AndNot, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, As, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorByte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorNInt, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorNUInt, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorSByte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, AsVectorUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 29564cbc9823d5b6f72131b8fdd23611ab81286a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 26 Mar 2025 09:59:33 -0700 Subject: [PATCH 024/120] Support Vector.BitwiseAnd/BitwiseOr --- src/coreclr/jit/gentree.cpp | 3 +++ src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 7 ++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3e7dc925095300..4c8cfae86e014f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28810,6 +28810,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Not: sveId = NI_Sve_Not; break; + case NI_AdvSimd_Or: + sveId = NI_Sve_Or; + break; default: sveId = id; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1cef845aec7062..c8bd2d2cedf666 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -929,6 +929,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_BitwiseAnd: case NI_Vector64_op_BitwiseAnd: case NI_Vector128_op_BitwiseAnd: { @@ -941,6 +942,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_BitwiseOr: case NI_Vector64_op_BitwiseOr: case NI_Vector128_op_BitwiseOr: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 05a4278659cf55..7a3acbf8958d19 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -33,15 +33,20 @@ HARDWARE_INTRINSIC(Vector, AsVectorSingle, HARDWARE_INTRINSIC(Vector, AsVectorUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, AsVectorUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, AsVectorUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector, BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) + //HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) //HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_op_Addition +#define LAST_NI_Vector NI_Vector_op_BitwiseOr // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags From 45ab7b98040d8d81ce10a26068567e2aa3adfbd2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 26 Mar 2025 10:40:52 -0700 Subject: [PATCH 025/120] Support Vector.ConvertTo* --- src/coreclr/jit/gentree.cpp | 24 ++++++++++++++++++++++- src/coreclr/jit/hwintrinsicarm64.cpp | 10 ++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 12 ++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4c8cfae86e014f..2cdd285be89559 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21932,7 +21932,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, unreached(); } #elif defined(TARGET_ARM64) - assert((simdSize == 8) || (simdSize == 16)); + assert((simdSize == 8) || (simdSize == 16) || (simdSize == compVectorTLength)); switch (simdSourceBaseJitType) { @@ -21989,6 +21989,10 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 +#if defined(TARGET_ARM64) + hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, hwIntrinsicID); +#endif + assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } @@ -28807,6 +28811,24 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_And: sveId = NI_Sve_And; break; + case NI_AdvSimd_Arm64_ConvertToDouble: + sveId = NI_Sve_ConvertToDouble; + break; + case NI_AdvSimd_ConvertToSingle: + sveId = NI_Sve_ConvertToSingle; + break; + case NI_AdvSimd_ConvertToInt32RoundToZero: + sveId = NI_Sve_ConvertToInt32; + break; + case NI_AdvSimd_ConvertToUInt32RoundToZero: + sveId = NI_Sve_ConvertToUInt32; + break; + case NI_AdvSimd_Arm64_ConvertToInt64RoundToZero: + sveId = NI_Sve_ConvertToInt64; + break; + case NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero: + sveId = NI_Sve_ConvertToUInt64; + break; case NI_AdvSimd_Not: sveId = NI_Sve_Not; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c8bd2d2cedf666..e0011534f1c038 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -984,6 +984,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToDouble: case NI_Vector64_ConvertToDouble: case NI_Vector128_ConvertToDouble: { @@ -997,6 +998,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToInt32Native: case NI_Vector64_ConvertToInt32Native: case NI_Vector128_ConvertToInt32Native: { @@ -1007,6 +1009,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, FALLTHROUGH; } + case NI_Vector_ConvertToInt32: case NI_Vector64_ConvertToInt32: case NI_Vector128_ConvertToInt32: { @@ -1018,6 +1021,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToInt64Native: case NI_Vector64_ConvertToInt64Native: case NI_Vector128_ConvertToInt64Native: { @@ -1028,6 +1032,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, FALLTHROUGH; } + case NI_Vector_ConvertToInt64: case NI_Vector64_ConvertToInt64: case NI_Vector128_ConvertToInt64: { @@ -1039,6 +1044,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToSingle: case NI_Vector64_ConvertToSingle: case NI_Vector128_ConvertToSingle: { @@ -1050,6 +1056,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToUInt32Native: case NI_Vector64_ConvertToUInt32Native: case NI_Vector128_ConvertToUInt32Native: { @@ -1060,6 +1067,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, FALLTHROUGH; } + case NI_Vector_ConvertToUInt32: case NI_Vector64_ConvertToUInt32: case NI_Vector128_ConvertToUInt32: { @@ -1071,6 +1079,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConvertToUInt64Native: case NI_Vector64_ConvertToUInt64Native: case NI_Vector128_ConvertToUInt64Native: { @@ -1081,6 +1090,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, FALLTHROUGH; } + case NI_Vector_ConvertToUInt64: case NI_Vector64_ConvertToUInt64: case NI_Vector128_ConvertToUInt64: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7a3acbf8958d19..1101ed42a1a06c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -36,6 +36,18 @@ HARDWARE_INTRINSIC(Vector, AsVectorUInt64, HARDWARE_INTRINSIC(Vector, BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector, ConvertToDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt32Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToInt64Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt32Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + + HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 3837693b2dc96b57f70a10f5b4997ac1d57b7a94 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 27 Mar 2025 16:21:35 -0700 Subject: [PATCH 026/120] Add CreateFalseMaskAll intrinsic --- src/coreclr/jit/hwintrinsicarm64.cpp | 17 ++++++++++++++++- src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index e0011534f1c038..73a243d0bbf07b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3269,7 +3269,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdEmbeddedMaskNode: Create an embedded mask +// gtNewSimdAllTrueMaskNode: Create a AllTrue mask node // // Arguments: // simdBaseJitType -- the base jit type of the nodes being masked @@ -3283,4 +3283,19 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); } +//------------------------------------------------------------------------ +// gtNewSimdAllFalseMaskNode: Create a AllFalse mask node +// +// Arguments: +// simdBaseJitType -- the base jit type of the nodes being masked +// simdSize -- the simd size of the nodes being masked +// +// Return Value: +// The mask +// +GenTree* Compiler::gtNewSimdAllFalseMaskNode(CorInfoType simdBaseJitType, unsigned simdSize) +{ + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskAll, simdBaseJitType, simdSize); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 1101ed42a1a06c..979f3fc3e65d72 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -364,6 +364,7 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskAll, -1, 0, {INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse, INS_sve_pfalse}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) // Scalar variants of Saturating*By*BitElementCount. There is 8bit versions as the generic version is scalar only. HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) From ca1675c37fdb63942592dba8295ce95010201e76 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 27 Mar 2025 21:31:01 -0700 Subject: [PATCH 027/120] Temporary fix for scratch register size calculation. Need to revisit --- src/coreclr/jit/regset.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 3d9354b040f48e..4a750301fa39b9 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -604,8 +604,18 @@ var_types RegSet::tmpNormalizeType(var_types type) { type = TYP_SIMD16; } + +#if defined(TARGET_ARM64) + if (type == TYP_SIMD32) + { + //TODO-VL: temporary work around to allow scalable registers + type = TYP_SIMD16; + } +#endif + #endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) + return type; } @@ -682,6 +692,13 @@ void RegSet::tmpPreAllocateTemps(var_types type, unsigned count) assert(type == tmpNormalizeType(type)); unsigned size = genTypeSize(type); +#ifdef TARGET_ARM64 + if (type == TYP_SIMD32) + { + size = 16; // SIMD registers overlap with SVE registers + } +#endif + // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1) noway_assert(size >= sizeof(int)); From 7774e079d41632794ed1f232a5e408db3c94f37a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 27 Mar 2025 21:34:19 -0700 Subject: [PATCH 028/120] Fix to squash in 9542e9cd047 --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/gentree.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 186235f1565395..c7650f99116006 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3143,6 +3143,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdAllFalseMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f3fd5d59964cb7..b3eebb450bb614 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -9758,7 +9758,8 @@ inline bool GenTree::IsMaskZero() const assert(op1->OperIsHWIntrinsic()); id = op1->AsHWIntrinsic()->GetHWIntrinsicId(); } - return ((id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64)); + return ((id == NI_Sve_CreateFalseMaskAll) || + (id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64)); } #endif From c170a7ef4ffc52dee4e4a5b0fd2429c459ba359c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 11:09:33 -0700 Subject: [PATCH 029/120] Support Vector.Equals*, GreaterThan*, LessThan* --- src/coreclr/jit/assertionprop.cpp | 2 + src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/gentree.cpp | 213 ++++++++++++++++++++-- src/coreclr/jit/hwintrinsic.cpp | 6 + src/coreclr/jit/hwintrinsicarm64.cpp | 16 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 25 ++- src/coreclr/jit/lower.h | 3 + src/coreclr/jit/lowerarmarch.cpp | 178 +++++++++++++++++- src/coreclr/jit/valuenum.cpp | 4 + 9 files changed, 431 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 38131a8c5f2b76..d77e8f170dc1fe 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -296,6 +296,8 @@ bool IntegralRange::Contains(int64_t value) const // Example: IntCns = 42 gives [0..127] with a non -precise range, [42,42] with a precise range. return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ByteMax}; #elif defined(TARGET_ARM64) + case NI_Vector_op_Equality: + case NI_Vector_op_Inequality: case NI_Vector64_op_Equality: case NI_Vector64_op_Inequality: case NI_Vector128_op_Equality: diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c7650f99116006..d1228417cde2f3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3161,7 +3161,8 @@ class Compiler GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool wrapInCvtm = true)); GenTree* gtNewSimdCmpOpAllNode(genTreeOps op, var_types type, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2cdd285be89559..a777d4b01defaf 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22031,8 +22031,12 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, } #endif // FEATURE_MASKED_HW_INTRINSICS -GenTree* Compiler::gtNewSimdCmpOpNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, + var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool wrapInCvtm)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -22056,7 +22060,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode( if (intrinsic != NI_Illegal) { #if defined(FEATURE_MASKED_HW_INTRINSICS) - if (lookupType != type) + if (wrapInCvtm && (lookupType != type)) { assert(varTypeIsMask(lookupType)); GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -22417,7 +22421,41 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( #elif defined(TARGET_ARM64) case GT_EQ: { - intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; + if (simdSize == 8) + { + intrinsic = NI_Vector64_op_Equality; + } + else if (simdSize == 16) + { + intrinsic = NI_Vector128_op_Equality; + } + else + { + assert(simdSize > 16); + GenTree* cmpResult = + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + + // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` + // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` + // lanes satisfies the CC condition and hence can return true. So the operations will be: + // p1 = SVE_CMP_CC(a, b) + // r1 = CNTP(p1) + // r2 = CNT{B,H,W,D} // only for NativeAOT. For JIT, this is a constant + // cmp r1, r2 + // + // It can also be done without having to find out VL using CNT{B,H,W,D}, using something like: + // p1 = SVE_CMP_CC(a, b) + // p2 = SVE_NOT(p1) + // r1 = CNTP(p2) + // if r1 == 0 return true else false + // + // However, NOT() operation only operates on "byte" variant i.e. `p1.B`, while the result of `p1` from + // `SVE_CMP_CC` can be of other variants like `p1.S` or `p1.D`, etc. + GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + } break; } @@ -22437,9 +22475,37 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( { intrinsic = NI_Vector128_op_Equality; } + if (simdSize > 16) + { + GenTree* cmpResult = + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); - op2 = gtNewAllBitsSetConNode(simdType); + // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` + // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` + // lanes satisfies the CC condition and hence can return true. So the operations will be: + // p1 = SVE_CMP_CC(a, b) + // r1 = CNTP(p1) + // r2 = CNT{B,H,W,D} // only for NativeAOT. For JIT, this is a constant + // cmp r1, r2 + // + // It can also be done without having to find out VL using CNT{B,H,W,D}, using something like: + // p1 = SVE_CMP_CC(a, b) + // p2 = SVE_NOT(p1) + // r1 = CNTP(p2) + // if r1 == 0 return true else false + // + // However, NOT() operation only operates on "byte" variant i.e. `p1.B`, while the result of `p1` from + // `SVE_CMP_CC` can be of other variants like `p1.S` or `p1.D`, etc. + GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + } + else + { + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); + op2 = gtNewAllBitsSetConNode(simdType); + } if (simdBaseType == TYP_FLOAT) { @@ -22463,6 +22529,10 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } } +#if defined(TARGET_ARM64) + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); +#endif + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -22567,8 +22637,29 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); - op2 = gtNewZeroConNode(simdType); + if (simdSize > 16) + { + GenTree* cmpResult = + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + + // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` + // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` + // lanes satisfies the CC condition and hence can return true. So the operations will be: + // p1 = SVE_CMP_CC(a, b) + // r1 = CNTP(p1) + // if r1 != 0 return true else false + + GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + } + else + { + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); + op2 = gtNewZeroConNode(simdType); + } if (simdBaseType == TYP_FLOAT) { @@ -22585,7 +22676,36 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( case GT_NE: { - intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; + if (simdSize == 8) + { + intrinsic = NI_Vector64_op_Inequality; + } + else if (simdSize == 16) + { + intrinsic = NI_Vector128_op_Inequality; + } + else + { + assert(simdSize > 16); + + intrinsic = NI_Vector_op_Inequality; + + GenTree* cmpResult = + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + + // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` + // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` + // lanes satisfies the CC condition and hence can return true. So the operations will be: + // p1 = SVE_CMP_CC(a, b) + // r1 = CNTP(p1) + // if r1 != 0 return true else false + + GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + } break; } #else @@ -22598,6 +22718,10 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } } +#if defined(TARGET_ARM64) + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); +#endif + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -28789,7 +28913,6 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } } - //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // @@ -28811,6 +28934,26 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_And: sveId = NI_Sve_And; break; + case NI_AdvSimd_CompareEqual: + case NI_AdvSimd_Arm64_CompareEqual: + sveId = NI_Sve_CompareEqual; + break; + case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual: + case NI_AdvSimd_CompareGreaterThanOrEqual: + sveId = NI_Sve_CompareGreaterThanOrEqual; + break; + case NI_AdvSimd_Arm64_CompareGreaterThan: + case NI_AdvSimd_CompareGreaterThan: + sveId = NI_Sve_CompareGreaterThan; + break; + case NI_AdvSimd_Arm64_CompareLessThanOrEqual: + case NI_AdvSimd_CompareLessThanOrEqual: + sveId = NI_Sve_CompareLessThanOrEqual; + break; + case NI_AdvSimd_Arm64_CompareLessThan: + case NI_AdvSimd_CompareLessThan: + sveId = NI_Sve_CompareLessThan; + break; case NI_AdvSimd_Arm64_ConvertToDouble: sveId = NI_Sve_ConvertToDouble; break; @@ -28835,6 +28978,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Or: sveId = NI_Sve_Or; break; + case NI_Vector128_op_Equality: + sveId = NI_Vector_op_Equality; + break; + case NI_Vector128_op_Inequality: + sveId = NI_Vector_op_Inequality; + break; default: sveId = id; } @@ -29774,16 +29923,20 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, if (varTypeIsMask(type)) { assert(!isScalar); +#if defined(TARGET_XARCH) assert(comp->canUseEvexEncodingDebugOnly()); +#endif } +#if !defined(TARGET_ARM64) else if (simdSize == 32) { assert(!isScalar); assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); } +#endif // !TARGET_ARM64 else { - assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16)); + assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == Compiler::compVectorTLength)); #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); @@ -30082,6 +30235,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; } +#elif defined(TARGET_ARM64) + if (simdSize > 16) + { + id = NI_Sve_CompareNotEqualTo; + } #endif // TARGET_XARCH break; } @@ -30092,6 +30250,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } } +#if defined(TARGET_ARM64) + id = GetScalableHWIntrinsicId(simdSize, id); +#endif + return id; } @@ -30161,7 +30323,28 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( unreached(); } } -#endif // TARGET_XARCH +#elif defined(TARGET_ARM64) + switch (oper) + { + case GT_EQ: + case GT_GE: + case GT_LE: + case GT_NE: + case GT_GT: + case GT_LT: + { + if (simdSize > 16) + { + lookupType = TYP_MASK; + } + break; + } + default: + { + unreached(); + } + } +#endif // TARGET_XARCH || TARGET_ARM64 return lookupType; } @@ -30190,6 +30373,8 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec switch (gtHWIntrinsicId) { #if defined(TARGET_ARM64) + case NI_Vector_op_Equality: + case NI_Vector_op_Inequality: case NI_Vector64_op_Equality: case NI_Vector64_op_Inequality: #endif // TARGET_ARM64 @@ -32032,6 +32217,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_op_Equality: #if defined(TARGET_ARM64) + case NI_Vector_op_Equality: case NI_Vector64_op_Equality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Equality: @@ -32045,6 +32231,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_op_Inequality: #if defined(TARGET_ARM64) + case NI_Vector_op_Inequality: case NI_Vector64_op_Inequality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Inequality: @@ -32506,6 +32693,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_op_Equality: #if defined(TARGET_ARM64) + case NI_Vector_op_Equality: case NI_Vector64_op_Equality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Equality: @@ -32527,6 +32715,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector128_op_Inequality: #if defined(TARGET_ARM64) + case NI_Vector_op_Inequality: case NI_Vector64_op_Inequality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Inequality: diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 862fdc8f45912a..219dd0aef3f19c 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1232,6 +1232,12 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI { return simdSize; } +#if defined(TARGET_ARM64) + else if ((FIRST_NI_Vector <= id) && (id <= LAST_NI_Vector)) + { + return Compiler::compVectorTLength; + } +#endif CORINFO_CLASS_HANDLE typeHnd = nullptr; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 73a243d0bbf07b..375f531b06568c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1366,6 +1366,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Equals: case NI_Vector64_Equals: case NI_Vector128_Equals: { @@ -1378,6 +1379,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_Equality: case NI_Vector64_op_Equality: case NI_Vector128_op_Equality: { @@ -1390,6 +1392,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_EqualsAny: case NI_Vector64_EqualsAny: case NI_Vector128_EqualsAny: { @@ -1668,6 +1671,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThan: case NI_Vector64_GreaterThan: case NI_Vector128_GreaterThan: { @@ -1680,6 +1684,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThanAll: case NI_Vector64_GreaterThanAll: case NI_Vector128_GreaterThanAll: { @@ -1692,6 +1697,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThanAny: case NI_Vector64_GreaterThanAny: case NI_Vector128_GreaterThanAny: { @@ -1704,6 +1710,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThanOrEqual: case NI_Vector64_GreaterThanOrEqual: case NI_Vector128_GreaterThanOrEqual: { @@ -1716,6 +1723,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThanOrEqualAll: case NI_Vector64_GreaterThanOrEqualAll: case NI_Vector128_GreaterThanOrEqualAll: { @@ -1728,6 +1736,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GreaterThanOrEqualAny: case NI_Vector64_GreaterThanOrEqualAny: case NI_Vector128_GreaterThanOrEqualAny: { @@ -1873,6 +1882,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThan: case NI_Vector64_LessThan: case NI_Vector128_LessThan: { @@ -1885,6 +1895,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThanAll: case NI_Vector64_LessThanAll: case NI_Vector128_LessThanAll: { @@ -1897,6 +1908,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThanAny: case NI_Vector64_LessThanAny: case NI_Vector128_LessThanAny: { @@ -1909,6 +1921,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThanOrEqual: case NI_Vector64_LessThanOrEqual: case NI_Vector128_LessThanOrEqual: { @@ -1921,6 +1934,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThanOrEqualAll: case NI_Vector64_LessThanOrEqualAll: case NI_Vector128_LessThanOrEqualAll: { @@ -1933,6 +1947,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LessThanOrEqualAny: case NI_Vector64_LessThanOrEqualAny: case NI_Vector128_LessThanOrEqualAny: { @@ -2171,6 +2186,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_Inequality: case NI_Vector64_op_Inequality: case NI_Vector128_op_Inequality: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 979f3fc3e65d72..9c1ad341872b2f 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -49,16 +49,33 @@ HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +HARDWARE_INTRINSIC(Vector, GreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, GreaterThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +HARDWARE_INTRINSIC(Vector, LessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, LessThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) - -//HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -//HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_op_BitwiseOr +HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +#define LAST_NI_Vector NI_Vector_op_Inequality // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 611ceb09339233..0950ca70d157bd 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -436,6 +436,9 @@ class Lowering final : public Phase #ifdef FEATURE_HW_INTRINSICS GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); +#if defined(TARGET_ARM64) + GenTree* LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps cmpOp); +#endif GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index e90750f937e9fa..5245049da842c9 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1757,6 +1757,16 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Vector_op_Equality: + { + return LowerHWIntrinsicCmpOpVL(node, GT_EQ); + } + + case NI_Vector_op_Inequality: + { + return LowerHWIntrinsicCmpOpVL(node, GT_NE); + } + case NI_Vector64_op_Equality: case NI_Vector128_op_Equality: { @@ -2019,8 +2029,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { assert((node->GetHWIntrinsicId() == NI_Vector64_Create) || (node->GetHWIntrinsicId() == NI_Vector128_Create) || - (node->GetHWIntrinsicId() == NI_Vector_Create) || - (node->GetHWIntrinsicId() == NI_Vector64_CreateScalar) || + (node->GetHWIntrinsicId() == NI_Vector_Create) || (node->GetHWIntrinsicId() == NI_Vector64_CreateScalar) || (node->GetHWIntrinsicId() == NI_Vector128_CreateScalar) || (node->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe) || (node->GetHWIntrinsicId() == NI_Vector128_CreateScalarUnsafe) || @@ -2047,6 +2056,171 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) return false; } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOpVL: Lowers a Vector comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + assert(simdType == TYP_SIMD32); + + assert((intrinsicId == NI_Vector_op_Equality) || (intrinsicId == NI_Vector_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(simdBaseType)); + assert(simdSize != 0); + assert(node->TypeIs(TYP_INT)); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 mask + // /--* op1 mask + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + + // Optimize comparison against Vector.Zero via CNTP: + // + // bool eq = v == Vector.Zero + // + // to: + // + // bool eq = Sve.GetActiveElementCount(v) == 0; + // + + GenTree* op = nullptr; + GenTree* opZero = nullptr; + if (op1->IsMaskZero()) + { + op = op2; + opZero = op1; + } + else if (op2->IsMaskZero()) + { + op = op1; + opZero = op2; + } + + // Currently only `some == Vector.Zero` is handled + if (op != nullptr) + { + + NamedIntrinsic elementCountIntrinsicId; + int elementsCnt = 0; + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + elementCountIntrinsicId = NI_Sve_Count8BitElements; + elementsCnt = simdSize; + break; + case TYP_SHORT: + case TYP_USHORT: + elementCountIntrinsicId = NI_Sve_Count16BitElements; + elementsCnt = simdSize / 2; + break; + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + elementCountIntrinsicId = NI_Sve_Count32BitElements; + elementsCnt = simdSize / 4; + break; + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + elementCountIntrinsicId = NI_Sve_Count64BitElements; + elementsCnt = simdSize / 8; + break; + default: + unreached(); + } + + GenTree* cntNode; + + if (cmpOp == GT_EQ) + { + if (comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + { + GenTree* svePattern = comp->gtNewIconNode(31, TYP_LONG); + BlockRange().InsertBefore(node, svePattern); + + cntNode = comp->gtNewSimdHWIntrinsicNode(TYP_LONG, svePattern, elementCountIntrinsicId, + CORINFO_TYPE_LONG, simdSize); + } + else + { + cntNode = comp->gtNewIconNode(elementsCnt, TYP_LONG); + } + } + else + { + // For inequality, we need to just check if all lanes are 0 + cntNode = comp->gtNewIconNode(0, TYP_LONG); + } + + BlockRange().InsertBefore(node, cntNode); + BlockRange().Remove(opZero); + + LowerNode(cntNode); + + node->ChangeOper(cmpOp); + node->gtType = TYP_INT; + node->AsOp()->gtOp1 = op1; + node->AsOp()->gtOp2 = cntNode; + + LowerNodeCC(node, (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + LowerNode(node); + return node->gtNext; + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, NI_Sve_CompareEqual, simdBaseJitType, simdSize); + BlockRange().InsertBefore(node, cmp); + + // Save cmp into a temp as we're going to need to pass it GetActiveElementCount + node->Op(1) = cmp; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); + ReplaceWithLclVar(tmp1Use); + GenTree* cmpResult = node->Op(1); + LowerNode(cmpResult); + + GenTree* allTrue = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + GenTree* activeElemCnt = comp->gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + GenTree* cntNode = comp->gtNewIconNode(0, TYP_LONG); + BlockRange().InsertBefore(node, allTrue); + BlockRange().InsertBefore(node, activeElemCnt); + BlockRange().InsertBefore(node, cntNode); + + LowerNode(activeElemCnt); + LowerNode(cntNode); + + LowerNode(cmp); + + node->ChangeOper(cmpOp); + node->gtType = TYP_INT; + node->AsOp()->gtOp1 = activeElemCnt; + node->AsOp()->gtOp2 = cntNode; + + LowerNodeCC(node, (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + LowerNode(node); + return node->gtNext; +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic // diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 895b3c8c571621..849f4770b72640 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -8737,6 +8737,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( case NI_Vector128_op_Equality: #if defined(TARGET_ARM64) + case NI_Vector_op_Equality: case NI_Vector64_op_Equality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Equality: @@ -8758,6 +8759,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( case NI_Vector128_op_Inequality: #if defined(TARGET_ARM64) + case NI_Vector_op_Inequality: case NI_Vector64_op_Inequality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Inequality: @@ -8863,6 +8865,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( { case NI_Vector128_op_Equality: #if defined(TARGET_ARM64) + case NI_Vector_op_Equality: case NI_Vector64_op_Equality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Equality: @@ -8880,6 +8883,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( case NI_Vector128_op_Inequality: #if defined(TARGET_ARM64) + case NI_Vector_op_Inequality: case NI_Vector64_op_Inequality: #elif defined(TARGET_XARCH) case NI_Vector256_op_Inequality: From 15f03849d2a25ec6224e58a67119e920400062cd Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 16:27:12 -0700 Subject: [PATCH 030/120] Support Vector.Max/MaxNative --- src/coreclr/jit/gentree.cpp | 12 ++++++++++-- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a777d4b01defaf..2c3af6f3eae7c6 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22765,7 +22765,8 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, NI_AdvSimd_BitwiseSelect, simdBaseJitType, simdSize); + intrinsic = (simdSize > 16) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24748,6 +24749,9 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( if (intrinsic != NI_Illegal) { +#ifdef TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); +#endif return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -24755,7 +24759,7 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( GenTree* op2Dup = fgMakeMultiUse(&op2); // op1 = op1 > op2 - op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize); + op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); // result = ConditionalSelect(op1, op1Dup, op2Dup) return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); @@ -28984,6 +28988,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_Vector128_op_Inequality: sveId = NI_Vector_op_Inequality; break; + case NI_AdvSimd_Max: + case NI_AdvSimd_Arm64_Max: + sveId = NI_Sve_Max; + break; default: sveId = id; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 375f531b06568c..16eb1e33c0e5d2 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2043,6 +2043,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Max: case NI_Vector64_Max: case NI_Vector128_Max: { @@ -2055,6 +2056,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_MaxNative: case NI_Vector64_MaxNative: case NI_Vector128_MaxNative: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9c1ad341872b2f..279ab707a18b7d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -66,6 +66,8 @@ HARDWARE_INTRINSIC(Vector, LessThanAny, HARDWARE_INTRINSIC(Vector, LessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 84d7bf338884f3677292211bb37dcc64c110ff6c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 16:34:03 -0700 Subject: [PATCH 031/120] Support Vector.Min/MinNative --- src/coreclr/jit/gentree.cpp | 17 ++++++++++++----- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 3 ++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2c3af6f3eae7c6..95b86a43ff4ae2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25013,6 +25013,9 @@ GenTree* Compiler::gtNewSimdMinNativeNode( if (intrinsic != NI_Illegal) { +#ifdef TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); +#endif return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25020,7 +25023,7 @@ GenTree* Compiler::gtNewSimdMinNativeNode( GenTree* op2Dup = fgMakeMultiUse(&op2); // op1 = op1 < op2 - op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize); + op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); // result = ConditionalSelect(op1, op1Dup, op2Dup) return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); @@ -28976,6 +28979,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero: sveId = NI_Sve_ConvertToUInt64; break; + case NI_AdvSimd_Max: + case NI_AdvSimd_Arm64_Max: + sveId = NI_Sve_Max; + break; + case NI_AdvSimd_Min: + case NI_AdvSimd_Arm64_Min: + sveId = NI_Sve_Min; + break; case NI_AdvSimd_Not: sveId = NI_Sve_Not; break; @@ -28988,10 +28999,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_Vector128_op_Inequality: sveId = NI_Vector_op_Inequality; break; - case NI_AdvSimd_Max: - case NI_AdvSimd_Arm64_Max: - sveId = NI_Sve_Max; - break; default: sveId = id; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 16eb1e33c0e5d2..b83ba8036afd0e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2074,6 +2074,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Min: case NI_Vector64_Min: case NI_Vector128_Min: { @@ -2086,6 +2087,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_MinNative: case NI_Vector64_MinNative: case NI_Vector128_MinNative: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 279ab707a18b7d..dd334a6774c062 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -68,7 +68,8 @@ HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - +HARDWARE_INTRINSIC(Vector, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 2dff8b8ed6bbd062b83d20d94313799c80714ef7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 16:48:54 -0700 Subject: [PATCH 032/120] Support Vector.MinNumber/MaxNumber --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b83ba8036afd0e..b0c789598b9251 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2044,6 +2044,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Max: + //case NI_Vector_MaxNumber: case NI_Vector64_Max: case NI_Vector128_Max: { @@ -2075,6 +2076,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Min: + //case NI_Vector_MinNumber: case NI_Vector64_Min: case NI_Vector128_Min: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index dd334a6774c062..3e39790768fbed 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -68,8 +68,12 @@ HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MaxNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) + HARDWARE_INTRINSIC(Vector, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) + //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) //HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 58c872cdca99397b7f9dd7a2a3c118269ae4a585 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 17:11:57 -0700 Subject: [PATCH 033/120] Support Vector.IsPositive/IsNegative/IsPositiveInfinity --- src/coreclr/jit/hwintrinsicarm64.cpp | 3 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b0c789598b9251..70d09fafe3a433 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1802,6 +1802,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_IsNegative: case NI_Vector64_IsNegative: case NI_Vector128_IsNegative: { @@ -1846,6 +1847,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_IsPositive: case NI_Vector64_IsPositive: case NI_Vector128_IsPositive: { @@ -1855,6 +1857,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_IsPositiveInfinity: case NI_Vector64_IsPositiveInfinity: case NI_Vector128_IsPositiveInfinity: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 3e39790768fbed..a2562465013854 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -60,6 +60,11 @@ HARDWARE_INTRINSIC(Vector, GreaterThanOrEqual, HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, IsNegative, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsPositive, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, IsPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) + + HARDWARE_INTRINSIC(Vector, LessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From d6d197d9ec4292e401a7a0ab0fa16eed73d73cdb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 18:00:26 -0700 Subject: [PATCH 034/120] Support Vector.get_Zero/One/AllBitsSet --- src/coreclr/jit/codegenarm64.cpp | 3 +-- src/coreclr/jit/fgbasic.cpp | 3 +++ src/coreclr/jit/hwintrinsicarm64.cpp | 3 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 5 +++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 6240267f5c1b43..c14158bc66dcf2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2507,8 +2507,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (vecCon->IsAllBitsSet()) { // Use Scalable_B because for Ones, it doesn't matter. - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, 0xFF, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_IMM_BITMASK); + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, -1, INS_OPTS_SCALABLE_B); } else if (vecCon->IsZero()) { diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index de56c5a1f7dc14..0df475600a403d 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1529,6 +1529,9 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed #if defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_ARM64) + case NI_Vector_get_AllBitsSet: + case NI_Vector_get_One: + case NI_Vector_get_Zero: case NI_Vector64_get_AllBitsSet: case NI_Vector64_get_One: case NI_Vector64_get_Zero: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 70d09fafe3a433..90c833d480099e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1608,6 +1608,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_get_AllBitsSet: case NI_Vector64_get_AllBitsSet: case NI_Vector128_get_AllBitsSet: { @@ -1624,6 +1625,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_get_One: case NI_Vector64_get_One: case NI_Vector128_get_One: { @@ -1632,6 +1634,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_get_Zero: case NI_Vector64_get_Zero: case NI_Vector128_get_Zero: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index a2562465013854..ac05d78266cc80 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -80,8 +80,9 @@ HARDWARE_INTRINSIC(Vector, MinNative, HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -//HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -//HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) From ad4757837de7333475b9e12a8e9dff670080ee9e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 18:44:27 -0700 Subject: [PATCH 035/120] Support Vector.get_Indices/Sve.Index --- src/coreclr/jit/codegenarm64.cpp | 4 ++-- src/coreclr/jit/hwintrinsicarm64.cpp | 7 +++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 17 +++++++++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 ++ src/coreclr/jit/lowerarmarch.cpp | 13 +++++++++++++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c14158bc66dcf2..975f771f71b118 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2539,7 +2539,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre simd32_t constValue; memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd32_t)); hnd = emit->emitSimd32Const(constValue); - emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); } } else @@ -2569,7 +2569,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre simd64_t constValue; memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); hnd = emit->emitSimd64Const(constValue); - emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0); + emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); } } } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 90c833d480099e..c40bba363d1a25 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1617,6 +1617,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_get_Indices: + { + GenTree* start = gtNewIconNode(0, TYP_INT); + GenTree* step = gtNewIconNode(1, TYP_INT); + retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); + break; + } case NI_Vector64_get_Indices: case NI_Vector128_get_Indices: { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 4b5713b628f587..c9d482c0745bb5 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2649,6 +2649,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_Index: + { + // either both should be available or both not. + assert ((op1Reg == REG_NA) == (op2Reg == REG_NA)); + + if (op1Reg == REG_NA) + { + int start = (int)intrin.op1->AsIntCon()->gtIconVal; + int step = (int)intrin.op2->AsIntCon()->gtIconVal; + GetEmitter()->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_sve_index, emitTypeSize(intrin.baseType), targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + break; + } default: unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index ac05d78266cc80..a0df51a1c3c953 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -81,6 +81,7 @@ HARDWARE_INTRINSIC(Vector, MinNumber, //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, get_Indices, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_Zero, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -222,6 +223,7 @@ HARDWARE_INTRINSIC(Sve, GetFfrSByte, HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(Sve, Index, -1, 2, {INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5245049da842c9..d25e8544760603 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1952,6 +1952,19 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) StoreFFRValue(node); break; } + case NI_Sve_Index: + { + int start = (int)node->Op(1)->AsIntCon()->IconValue(); + int step = (int)node->Op(1)->AsIntCon()->IconValue(); + bool encodableStart = ((-16 <= start) && (start <= 15)); + bool encodableStep = ((-16 <= step) && (step <= 15)); + if (encodableStart && encodableStep) + { + node->Op(1)->SetContained(); + node->Op(2)->SetContained(); + } + break; + } default: break; From fafee9abdb4f4e4b94ed0beacb4660b351e2a22c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 18:52:57 -0700 Subject: [PATCH 036/120] Support Vector.Multiply --- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 7 +++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 95b86a43ff4ae2..20d51960417035 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28987,6 +28987,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_Min: sveId = NI_Sve_Min; break; + case NI_AdvSimd_Multiply: + case NI_AdvSimd_Arm64_Multiply: + sveId = NI_Sve_Multiply; + break; case NI_AdvSimd_Not: sveId = NI_Sve_Not; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c40bba363d1a25..1b69dcb3013bd9 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2120,6 +2120,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_Multiply: case NI_Vector64_op_Multiply: case NI_Vector128_op_Multiply: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index a0df51a1c3c953..1ca3752b6c87e1 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -78,6 +78,8 @@ HARDWARE_INTRINSIC(Vector, MaxNumber, HARDWARE_INTRINSIC(Vector, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) + //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -89,7 +91,8 @@ HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_Vector NI_Vector_op_Inequality +HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +#define LAST_NI_Vector NI_Vector_op_Multiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -297,7 +300,7 @@ HARDWARE_INTRINSIC(Sve, Min, HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, {INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) From b47583423321053bdc71ca2bb7ebc51cbb389fc4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 19:10:54 -0700 Subject: [PATCH 037/120] Support Vector.Subtract --- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 20d51960417035..314a586310577e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28997,6 +28997,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Or: sveId = NI_Sve_Or; break; + case NI_AdvSimd_Subtract: + case NI_AdvSimd_Arm64_Subtract: + sveId = NI_Sve_Subtract; + break; case NI_Vector128_op_Equality: sveId = NI_Vector_op_Equality; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1b69dcb3013bd9..b39cfcf3062079 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2227,6 +2227,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_Subtraction: case NI_Vector64_op_Subtraction: case NI_Vector128_op_Subtraction: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 1ca3752b6c87e1..96d04cc3d21b8e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -80,6 +80,7 @@ HARDWARE_INTRINSIC(Vector, MinNative, HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -92,7 +93,8 @@ HARDWARE_INTRINSIC(Vector, op_BitwiseOr, HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector NI_Vector_op_Multiply +HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +#define LAST_NI_Vector NI_Vector_op_Subtraction // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags From 37a78d71e0ff469198b23c3e35c20d3e3baa980d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 19:43:42 -0700 Subject: [PATCH 038/120] Support Vector.Divide --- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 6 ++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 314a586310577e..ca4e28ca06b3d5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28355,6 +28355,7 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) // genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar) { + //TODO-VL: Update this method with SVE_ intrinsics as well *isScalar = false; switch (id) @@ -28979,6 +28980,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_ConvertToUInt64RoundToZero: sveId = NI_Sve_ConvertToUInt64; break; + case NI_AdvSimd_Arm64_Divide: + sveId = NI_Sve_Divide; + break; case NI_AdvSimd_Max: case NI_AdvSimd_Arm64_Max: sveId = NI_Sve_Max; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b39cfcf3062079..d0859d6b38e487 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1322,6 +1322,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_Division: case NI_Vector64_op_Division: case NI_Vector128_op_Division: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 96d04cc3d21b8e..888075a6fe7c82 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -49,6 +49,7 @@ HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -90,6 +91,7 @@ HARDWARE_INTRINSIC(Vector, get_Zero, HARDWARE_INTRINSIC(Vector, op_Addition, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector, op_Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -284,14 +286,14 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt32, HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToUInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialSideEffectMask) -HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) +HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendFirstFaulting, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldff1w, INS_sve_ldff1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialSideEffectMask) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, Max, -1, -1, {INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_smax, INS_sve_umax, INS_sve_fmax, INS_sve_fmax}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) From e9eeca6d3f35ad12bd0cfdbe3188fcd1a2ffef7b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 Mar 2025 19:48:24 -0700 Subject: [PATCH 039/120] Support Vector.op_Xor --- src/coreclr/jit/gentree.cpp | 3 +++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ca4e28ca06b3d5..4c2139c1d163b1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -29011,6 +29011,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_Vector128_op_Inequality: sveId = NI_Vector_op_Inequality; break; + case NI_AdvSimd_Xor: + sveId = NI_Sve_Xor; + break; default: sveId = id; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index d0859d6b38e487..1c8dedfdb4f492 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2758,6 +2758,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_ExclusiveOr: case NI_Vector64_op_ExclusiveOr: case NI_Vector128_op_ExclusiveOr: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 888075a6fe7c82..9812b31ec12956 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -82,6 +82,7 @@ HARDWARE_INTRINSIC(Vector, MinNumber, HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -93,6 +94,7 @@ HARDWARE_INTRINSIC(Vector, op_BitwiseAnd, HARDWARE_INTRINSIC(Vector, op_BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, op_Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, op_ExclusiveOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 8e909591bda294db7e4e68c6273df3ea51f10f84 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 09:58:54 -0700 Subject: [PATCH 040/120] Support Vector.op_OnesComplement/op_UnaryNegation/op_UnaryPlus --- src/coreclr/jit/fgbasic.cpp | 3 ++- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 3 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 ++++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 0df475600a403d..a8fb09b513b8c7 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1466,7 +1466,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_Vector64_AsUInt32: case NI_Vector64_AsUInt64: case NI_Vector64_op_UnaryPlus: -#endif // TARGET_XARCH + case NI_Vector_op_UnaryPlus: +#endif // TARGET_ARM64 case NI_Vector128_As: case NI_Vector128_AsByte: case NI_Vector128_AsDouble: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4c2139c1d163b1..6062068d3a0ca8 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -28926,6 +28926,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty // NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) { + //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places NamedIntrinsic sveId = id; if (simdSize > 16) { @@ -28995,6 +28996,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_Multiply: sveId = NI_Sve_Multiply; break; + case NI_AdvSimd_Negate: + sveId = NI_Sve_Negate; + break; case NI_AdvSimd_Not: sveId = NI_Sve_Not; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1c8dedfdb4f492..a89c094fcb43cf 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2189,6 +2189,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_UnaryNegation: case NI_Vector64_op_UnaryNegation: case NI_Vector128_op_UnaryNegation: { @@ -2198,6 +2199,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_OnesComplement: case NI_Vector64_op_OnesComplement: case NI_Vector128_op_OnesComplement: { @@ -2220,6 +2222,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_UnaryPlus: case NI_Vector64_op_UnaryPlus: case NI_Vector128_op_UnaryPlus: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9812b31ec12956..c889c27e0ea82d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -80,7 +80,8 @@ HARDWARE_INTRINSIC(Vector, Min, HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - +HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -97,8 +98,11 @@ HARDWARE_INTRINSIC(Vector, op_Equality, HARDWARE_INTRINSIC(Vector, op_ExclusiveOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -#define LAST_NI_Vector NI_Vector_op_Subtraction +HARDWARE_INTRINSIC(Vector, op_UnaryNegation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_UnaryPlus, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +#define LAST_NI_Vector NI_Vector_op_UnaryPlus // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags From e00d016e99840ff76593be5e71a825c32d4f7a51 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 10:18:22 -0700 Subject: [PATCH 041/120] Support Vector.MultiplyAddEstimate --- src/coreclr/jit/gentree.cpp | 8 ++++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 10 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6062068d3a0ca8..f928e873a6e1cf 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23504,6 +23504,10 @@ GenTree* Compiler::gtNewSimdFmaNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 +#if defined(TARGET_ARM64) + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); +#endif + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); } @@ -28984,6 +28988,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_Divide: sveId = NI_Sve_Divide; break; + case NI_AdvSimd_FusedMultiplyAdd: + case NI_AdvSimd_Arm64_FusedMultiplyAdd: + sveId = NI_Sve_FusedMultiplyAdd; + break; case NI_AdvSimd_Max: case NI_AdvSimd_Arm64_Max: sveId = NI_Sve_Max; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a89c094fcb43cf..fe94273c5491a6 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2142,6 +2142,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_MultiplyAddEstimate: case NI_Vector64_MultiplyAddEstimate: case NI_Vector128_MultiplyAddEstimate: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index c889c27e0ea82d..7217023f00a99c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -80,6 +80,7 @@ HARDWARE_INTRINSIC(Vector, Min, HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From f14f792595853fd801aa7c2e6f1e4fb82ca348c2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 10:57:50 -0700 Subject: [PATCH 042/120] Support Vector.IsZero/IsNaN --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index fe94273c5491a6..01424e02dd69b1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1804,6 +1804,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_IsNaN: case NI_Vector64_IsNaN: case NI_Vector128_IsNaN: { @@ -1887,6 +1888,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_IsZero: case NI_Vector64_IsZero: case NI_Vector128_IsZero: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7217023f00a99c..8a622cb7a45f2c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -60,11 +60,11 @@ HARDWARE_INTRINSIC(Vector, GreaterThanAny, HARDWARE_INTRINSIC(Vector, GreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, GreaterThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) - +HARDWARE_INTRINSIC(Vector, IsNaN, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, IsNegative, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, IsPositive, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, IsPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - +HARDWARE_INTRINSIC(Vector, IsZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From e976b4076c5994828411cf28eb55d45b1b934dd3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 11:15:30 -0700 Subject: [PATCH 043/120] Support Vector.Floor --- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f928e873a6e1cf..4fd6fb807c00af 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23445,6 +23445,7 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_Floor; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -28988,6 +28989,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_Divide: sveId = NI_Sve_Divide; break; + case NI_AdvSimd_Floor: + sveId = NI_Sve_RoundToNegativeInfinity; + break; case NI_AdvSimd_FusedMultiplyAdd: case NI_AdvSimd_Arm64_FusedMultiplyAdd: sveId = NI_Sve_FusedMultiplyAdd; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 01424e02dd69b1..c6dfed90b6b0f7 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1573,6 +1573,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Floor: case NI_Vector64_Floor: case NI_Vector128_Floor: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 8a622cb7a45f2c..7f5c00e4ecf666 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -53,7 +53,7 @@ HARDWARE_INTRINSIC(Vector, Division, HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) - +HARDWARE_INTRINSIC(Vector, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, GreaterThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From cb68fb952f78616c79fa2a2d7da9b5e92bc54cc6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 11:19:07 -0700 Subject: [PATCH 044/120] Support Vector.FusedMultiplyAdd --- src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c6dfed90b6b0f7..1d9a5d2e9d7d08 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1590,6 +1590,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_FusedMultiplyAdd: case NI_Vector64_FusedMultiplyAdd: case NI_Vector128_FusedMultiplyAdd: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7f5c00e4ecf666..27edd6a4ce1bb3 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -54,6 +54,7 @@ HARDWARE_INTRINSIC(Vector, Equals, HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, FusedMultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, GreaterThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From fe633ed6b4d4cf427e990f7f7a3db3c98bd24258 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 11:22:54 -0700 Subject: [PATCH 045/120] Support Vector.Ceiling --- src/coreclr/jit/gentree.cpp | 5 +++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4fd6fb807c00af..a9f0b2a36173df 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21560,6 +21560,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s { intrinsic = NI_AdvSimd_Ceiling; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -28948,6 +28949,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_And: sveId = NI_Sve_And; break; + case NI_AdvSimd_Ceiling: + case NI_AdvSimd_Arm64_Ceiling: + sveId = NI_Sve_RoundToPositiveInfinity; + break; case NI_AdvSimd_CompareEqual: case NI_AdvSimd_Arm64_CompareEqual: sveId = NI_Sve_CompareEqual; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1d9a5d2e9d7d08..bb68c7a454c6ce 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -955,6 +955,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Ceiling: case NI_Vector64_Ceiling: case NI_Vector128_Ceiling: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 27edd6a4ce1bb3..cfb340df9d3c22 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -35,7 +35,7 @@ HARDWARE_INTRINSIC(Vector, AsVectorUInt32, HARDWARE_INTRINSIC(Vector, AsVectorUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) - +HARDWARE_INTRINSIC(Vector, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, ConvertToDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToInt32Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From 2285a07db7b1424132f61272cbc381bd38880869 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 11:28:22 -0700 Subject: [PATCH 046/120] Support Vector.Round --- src/coreclr/jit/gentree.cpp | 5 +++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 7 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a9f0b2a36173df..e68a7bde868fa2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25601,6 +25601,7 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_RoundToNearest; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29022,6 +29023,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Or: sveId = NI_Sve_Or; break; + case NI_AdvSimd_RoundToNearest: + case NI_AdvSimd_Arm64_RoundToNearest: + sveId = NI_Sve_RoundToNearest; + break; case NI_AdvSimd_Subtract: case NI_AdvSimd_Arm64_Subtract: sveId = NI_Sve_Subtract; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index bb68c7a454c6ce..52b0862d2bb3f2 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2287,6 +2287,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Round: case NI_Vector64_Round: case NI_Vector128_Round: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index cfb340df9d3c22..8e3c78ebf48c43 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -84,6 +84,7 @@ HARDWARE_INTRINSIC(Vector, Multiply, HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 9bdb3b9e6710817d8e1787f4dd06af9d5fb77990 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 13:50:12 -0700 Subject: [PATCH 047/120] Support Vector.LoadVector* --- src/coreclr/jit/hwintrinsicarm64.cpp | 3 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 52b0862d2bb3f2..14bda6de0bf06b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1981,6 +1981,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_AdvSimd_LoadVector64: case NI_AdvSimd_LoadVector128: + case NI_Vector_LoadUnsafe: case NI_Vector64_LoadUnsafe: case NI_Vector128_LoadUnsafe: { @@ -2012,6 +2013,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LoadAligned: case NI_Vector64_LoadAligned: case NI_Vector128_LoadAligned: { @@ -2037,6 +2039,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_LoadAlignedNonTemporal: case NI_Vector64_LoadAlignedNonTemporal: case NI_Vector128_LoadAlignedNonTemporal: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 8e3c78ebf48c43..29f0b2e3136955 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -73,6 +73,11 @@ HARDWARE_INTRINSIC(Vector, LessThanAny, HARDWARE_INTRINSIC(Vector, LessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) + +HARDWARE_INTRINSIC(Vector, LoadAligned, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, LoadAlignedNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, LoadUnsafe, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) + HARDWARE_INTRINSIC(Vector, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -85,6 +90,7 @@ HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 5c6392c110afd55a2597cf4e4735648ae6c278d3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 13:59:18 -0700 Subject: [PATCH 048/120] Support Vector.Store* --- src/coreclr/jit/hwintrinsicarm64.cpp | 3 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 14bda6de0bf06b..2584b1363e9e98 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2450,6 +2450,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_StoreUnsafe: case NI_Vector64_StoreUnsafe: case NI_Vector128_StoreUnsafe: { @@ -2491,6 +2492,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_StoreAligned: case NI_Vector64_StoreAligned: case NI_Vector128_StoreAligned: { @@ -2521,6 +2523,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_StoreAlignedNonTemporal: case NI_Vector64_StoreAlignedNonTemporal: case NI_Vector128_StoreAlignedNonTemporal: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 29f0b2e3136955..67d0511e05ec25 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -91,6 +91,9 @@ HARDWARE_INTRINSIC(Vector, Negate, HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From bf9991c20b4bdb8234029d3eed2eaa81c6842cbc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 15:10:53 -0700 Subject: [PATCH 049/120] Support Vector.WidenLower/WidenUpper --- src/coreclr/jit/gentree.cpp | 25 ++++++++++++++++++--- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 3 +++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e68a7bde868fa2..86254b36958461 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26899,7 +26899,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo } else { - assert(simdSize == 8); + assert((simdSize == 8) || (simdSize == compVectorTLength)); tmp1 = op1; } @@ -26917,8 +26917,10 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningLower; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + assert(intrinsic != NI_Illegal); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, intrinsic, simdBaseJitType, 8); + tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsic, simdBaseJitType, 8); if (simdSize == 8) { @@ -27113,7 +27115,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) - if (simdSize == 16) + if ((simdSize == 16) || (simdSize == compVectorTLength)) { if (varTypeIsFloating(simdBaseType)) { @@ -27129,6 +27131,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningUpper; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -27156,6 +27159,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize); return gtNewSimdGetUpperNode(TYP_SIMD8, tmp1, simdBaseJitType, 16); } + #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -28977,6 +28981,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_ConvertToDouble: sveId = NI_Sve_ConvertToDouble; break; + case NI_AdvSimd_Arm64_ConvertToDoubleUpper: + sveId = NI_Sve_ConvertToDoubleUpper; + break; case NI_AdvSimd_ConvertToSingle: sveId = NI_Sve_ConvertToSingle; break; @@ -29027,10 +29034,22 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_RoundToNearest: sveId = NI_Sve_RoundToNearest; break; + case NI_AdvSimd_SignExtendWideningLower: + sveId = NI_Sve_SignExtendWideningLower; + break; + case NI_AdvSimd_SignExtendWideningUpper: + sveId = NI_Sve_SignExtendWideningUpper; + break; case NI_AdvSimd_Subtract: case NI_AdvSimd_Arm64_Subtract: sveId = NI_Sve_Subtract; break; + case NI_AdvSimd_ZeroExtendWideningLower: + sveId = NI_Sve_ZeroExtendWideningUpper; + break; + case NI_AdvSimd_ZeroExtendWideningUpper: + sveId = NI_Sve_ZeroExtendWideningUpper; + break; case NI_Vector128_op_Equality: sveId = NI_Vector_op_Equality; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2584b1363e9e98..6f604ce8b0833c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2690,6 +2690,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_WidenLower: case NI_Vector64_WidenLower: case NI_Vector128_WidenLower: { @@ -2701,6 +2702,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_WidenUpper: case NI_Vector64_WidenUpper: case NI_Vector128_WidenUpper: { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index c9d482c0745bb5..b7082b1e8ee751 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -591,6 +591,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_ConvertToInt64: case NI_Sve_ConvertToUInt64: case NI_Sve_ConvertToDouble: + case NI_Sve_ConvertToDoubleUpper: { embOpt = emitTypeSize(intrinEmbMask.baseType) == EA_4BYTE ? INS_OPTS_S_TO_D : INS_OPTS_SCALABLE_D; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 67d0511e05ec25..c974bae1c6ebf6 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -95,6 +95,8 @@ HARDWARE_INTRINSIC(Vector, StoreAligned, HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) @@ -155,6 +157,7 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Sve, ConvertToDouble, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_fcvt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertToDoubleUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToSingle, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_invalid, INS_sve_fcvt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) From a04d52b4aef82a055cd23c571719ef06ec68831e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 15:23:04 -0700 Subject: [PATCH 050/120] Support Vector.Truncate --- src/coreclr/jit/gentree.cpp | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 86254b36958461..324ae5b8ad8627 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26611,6 +26611,7 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_RoundToZero; } + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -29034,6 +29035,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_Arm64_RoundToNearest: sveId = NI_Sve_RoundToNearest; break; + case NI_AdvSimd_RoundToZero: + sveId = NI_Sve_RoundToZero; + break; case NI_AdvSimd_SignExtendWideningLower: sveId = NI_Sve_SignExtendWideningLower; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 6f604ce8b0833c..2743fb61f62acd 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2674,6 +2674,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Truncate: case NI_Vector64_Truncate: case NI_Vector128_Truncate: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index c974bae1c6ebf6..3151896c6e0c08 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -95,6 +95,7 @@ HARDWARE_INTRINSIC(Vector, StoreAligned, HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Truncate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From 8376fc1867fc47d32c5ea51f0db5c020c69f73f7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 31 Mar 2025 15:52:09 -0700 Subject: [PATCH 051/120] Support Vector.ConditionalSelect --- src/coreclr/jit/gentree.cpp | 10 ++++++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 12 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 324ae5b8ad8627..750b29de417ec5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22766,6 +22766,16 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) + if (simdSize > 16) + { + intrinsic = NI_Sve_ConditionalSelect; + op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); + } + else + { + intrinsic = NI_AdvSimd_BitwiseSelect; + } + intrinsic = (simdSize > 16) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2743fb61f62acd..1805ee250f7ce0 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -972,6 +972,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ConditionalSelect: case NI_Vector64_ConditionalSelect: case NI_Vector128_ConditionalSelect: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 3151896c6e0c08..ce8598964cdcf8 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -36,6 +36,7 @@ HARDWARE_INTRINSIC(Vector, AsVectorUInt64, HARDWARE_INTRINSIC(Vector, BitwiseAnd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, BitwiseOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ConditionalSelect, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, ConvertToDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToInt32Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From 1cebe09cb2b09c3480c288bc38b4398ffebeb3ae Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 1 Apr 2025 13:03:50 -0700 Subject: [PATCH 052/120] Support Vector.Create/Add Sve_DuplicateScalarToVector --- src/coreclr/jit/emitarm64.h | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 13 ++++++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 23 +++++++++++++++++++-- src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 +------ src/coreclr/jit/lowerarmarch.cpp | 11 ++++++++++ src/coreclr/jit/lsraarm64.cpp | 3 +++ 6 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index afc2852e00e133..5c6df58730be61 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -799,6 +799,8 @@ static bool isValidUimm_MultipleOf(ssize_t value) return isValidUimm(value / mod) && (value % mod == 0); } +public: + // Returns true if 'value' is a legal signed immediate with 'bits' number of bits. template static bool isValidSimm(ssize_t value) @@ -815,6 +817,8 @@ static bool isValidSimm_MultipleOf(ssize_t value) return isValidSimm(value / mod) && (value % mod == 0); } +private: + // Returns true if 'imm' is a valid broadcast immediate for some SVE DUP variants static bool isValidBroadcastImm(ssize_t imm, emitAttr laneSize) { diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1805ee250f7ce0..32ceb47cadfbbb 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -553,6 +553,11 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = 7; break; + case NI_Sve_DuplicateScalarToVector: + immLowerBound = -128; + immUpperBound = 127; + break; + default: unreached(); } @@ -1105,6 +1110,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Create: + { + assert(sig->numArgs == 1); + + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + break; + } + case NI_Vector64_Create: case NI_Vector128_Create: { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index b7082b1e8ee751..cb746fcd0f0e19 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2659,11 +2659,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { int start = (int)intrin.op1->AsIntCon()->gtIconVal; int step = (int)intrin.op2->AsIntCon()->gtIconVal; - GetEmitter()->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitIns_R_I_I(ins, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else { - GetEmitter()->emitIns_R_R_R(INS_sve_index, emitTypeSize(intrin.baseType), targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + break; + } + case NI_Sve_DuplicateScalarToVector: + { + if (op1Reg == REG_NA) + { + GetEmitter()->emitIns_R_I(ins, emitTypeSize(intrin.baseType), targetReg, intrin.op1->AsIntCon()->IconValue(), emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + else + { + if (varTypeIsIntegral(intrin.op1)) + { + GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + else + { + GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, 0, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } } break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index ce8598964cdcf8..455ac7b16a4a4f 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -47,8 +47,6 @@ HARDWARE_INTRINSIC(Vector, ConvertToUInt32, HARDWARE_INTRINSIC(Vector, ConvertToUInt32Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) - - HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector, Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -67,22 +65,18 @@ HARDWARE_INTRINSIC(Vector, IsNegative, HARDWARE_INTRINSIC(Vector, IsPositive, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, IsPositiveInfinity, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, IsZero, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - HARDWARE_INTRINSIC(Vector, LessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, LessThanOrEqualAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) - HARDWARE_INTRINSIC(Vector, LoadAligned, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LoadAlignedNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, LoadUnsafe, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - HARDWARE_INTRINSIC(Vector, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MaxNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - HARDWARE_INTRINSIC(Vector, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -100,7 +94,6 @@ HARDWARE_INTRINSIC(Vector, Truncate, HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) - //HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_Indices, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -207,6 +200,7 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, HARDWARE_INTRINSIC(Sve, Divide, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, DuplicateScalarToVector, -1, 1, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, DuplicateSelectedScalarToVector, -1, 2, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FloatingPointExponentialAccelerator, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fexpa, INS_invalid, INS_sve_fexpa, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index d25e8544760603..52aceb5084d2d0 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4403,6 +4403,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) MakeSrcContained(node, intrin.op5); } break; + case NI_Sve_DuplicateScalarToVector: + assert(hasImmediateOperand); + if (intrin.op1->IsCnsIntOrI()) + { + ssize_t iconValue = intrin.op1->AsIntCon()->IconValue(); + if (emitter::isValidSimm<8>(iconValue) || emitter::isValidSimm_MultipleOf<8, 256>(iconValue)) + { + MakeSrcContained(node, intrin.op1); + } + } + break; default: unreached(); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9af6bef2f17f19..55072165c39ab5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1726,6 +1726,9 @@ void LinearScan::BuildHWIntrinsicImmediate(GenTreeHWIntrinsic* intrinsicTree, co case NI_Sve_MultiplyAddRotateComplex: needBranchTargetReg = !intrin.op4->isContainedIntOrIImmed(); break; + case NI_Sve_DuplicateScalarToVector: + needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed(); + break; default: unreached(); From c626047e5151720f25242a5a1663e29d1e77d0e7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 1 Apr 2025 14:31:39 -0700 Subject: [PATCH 053/120] Support Vector.CreateSequence/Fix Sve_Index --- src/coreclr/jit/hwintrinsicarm64.cpp | 12 +++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 23 +++++++++++----- src/coreclr/jit/hwintrinsiclistarm64sve.h | 3 ++- src/coreclr/jit/lowerarmarch.cpp | 30 +++++++++++---------- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 32ceb47cadfbbb..15f53aa6ec97d6 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1626,6 +1626,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_CreateSequence: + { + //TODO-VL: Check if similar check is needed at other places in this methods. + if (simdSize > 16) + { + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); + } + break; + } + case NI_Vector_get_AllBitsSet: case NI_Vector64_get_AllBitsSet: case NI_Vector128_get_AllBitsSet: diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index cb746fcd0f0e19..957a6aa4f029cd 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2652,18 +2652,29 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_Sve_Index: { - // either both should be available or both not. - assert ((op1Reg == REG_NA) == (op2Reg == REG_NA)); - - if (op1Reg == REG_NA) + if ((op1Reg == REG_NA) && (op2Reg == REG_NA)) { int start = (int)intrin.op1->AsIntCon()->gtIconVal; int step = (int)intrin.op2->AsIntCon()->gtIconVal; - GetEmitter()->emitIns_R_I_I(ins, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitInsSve_R_I_I(ins, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + else if ((op1Reg != REG_NA) && (op2Reg != REG_NA)) + { + emitAttr scalarSize = emitActualTypeSize(node->GetSimdBaseType()); + GetEmitter()->emitInsSve_R_R_R(ins, scalarSize, targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + } + else if (op1Reg != REG_NA) + { + assert(op2Reg == REG_NA); + int step = (int)intrin.op2->AsIntCon()->gtIconVal; + GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op1Reg, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else { - GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + assert(op1Reg == REG_NA); + + int start = (int)intrin.op1->AsIntCon()->gtIconVal; + GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op2Reg, start, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)), INS_SCALABLE_OPTS_IMM_FIRST); } break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 455ac7b16a4a4f..d5d3d9ae422258 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -48,6 +48,7 @@ HARDWARE_INTRINSIC(Vector, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector, ConvertToUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector, CreateSequence, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -247,7 +248,7 @@ HARDWARE_INTRINSIC(Sve, GetFfrSByte, HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(Sve, Index, -1, 2, {INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Index, -1, 2, {INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 52aceb5084d2d0..cbe42279008ea2 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1952,19 +1952,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) StoreFFRValue(node); break; } - case NI_Sve_Index: - { - int start = (int)node->Op(1)->AsIntCon()->IconValue(); - int step = (int)node->Op(1)->AsIntCon()->IconValue(); - bool encodableStart = ((-16 <= start) && (start <= 15)); - bool encodableStep = ((-16 <= step) && (step <= 15)); - if (encodableStart && encodableStep) - { - node->Op(1)->SetContained(); - node->Op(2)->SetContained(); - } - break; - } default: break; @@ -4404,7 +4391,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; case NI_Sve_DuplicateScalarToVector: - assert(hasImmediateOperand); + assert(!hasImmediateOperand); if (intrin.op1->IsCnsIntOrI()) { ssize_t iconValue = intrin.op1->AsIntCon()->IconValue(); @@ -4414,6 +4401,21 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } } break; + case NI_Sve_Index: + { + assert(!hasImmediateOperand); + assert(varTypeIsIntegral(intrin.op1)); + assert(varTypeIsIntegral(intrin.op2)); + if (intrin.op1->IsCnsIntOrI() && emitter::isValidSimm<5>(intrin.op1->AsIntCon()->IconValue())) + { + MakeSrcContained(node, intrin.op1); + } + if (intrin.op2->IsCnsIntOrI() && emitter::isValidSimm<5>(intrin.op2->AsIntCon()->IconValue())) + { + MakeSrcContained(node, intrin.op2); + } + break; + } default: unreached(); From 62a2d9f149b8870ec7c18e17fc1333b7187a3a6a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Apr 2025 12:39:02 -0700 Subject: [PATCH 054/120] Support Vector.LeftShift/Add Sve_ShiftLeftLogicalImm --- src/coreclr/jit/gentree.cpp | 15 ++++++++++++++- src/coreclr/jit/hwintrinsicarm64.cpp | 12 ++++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 3 +++ src/coreclr/jit/lowerarmarch.cpp | 9 +++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 750b29de417ec5..a6f9fbb9e96eeb 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20969,7 +20969,14 @@ GenTree* Compiler::gtNewSimdBinOpNode( op2 = gtNewOperNode(GT_NEG, TYP_INT, op2); } - op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize); + if (simdSize > 16) + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize); + } #endif // !TARGET_XARCH && !TARGET_ARM64 } break; @@ -29048,6 +29055,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N case NI_AdvSimd_RoundToZero: sveId = NI_Sve_RoundToZero; break; + case NI_AdvSimd_ShiftLogical: + sveId = NI_Sve_ShiftLeftLogical; + break; + case NI_AdvSimd_ShiftLeftLogical: + sveId = NI_Sve_ShiftLeftLogicalImm; + break; case NI_AdvSimd_SignExtendWideningLower: sveId = NI_Sve_SignExtendWideningLower; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 15f53aa6ec97d6..1d03e3c649adbe 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2279,6 +2279,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ShiftLeft: + case NI_Vector_op_LeftShift: + { + assert(sig->numArgs == 2); + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(simdBaseJitType); + break; + } case NI_Vector64_op_LeftShift: case NI_Vector128_op_LeftShift: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index d5d3d9ae422258..5fc9cf661bd5a7 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -86,6 +86,7 @@ HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ShiftLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -107,6 +108,7 @@ HARDWARE_INTRINSIC(Vector, op_Division, HARDWARE_INTRINSIC(Vector, op_Equality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector, op_ExclusiveOr, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Inequality, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector, op_LeftShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -373,6 +375,7 @@ HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ShiftLeftLogicalImm, -1, -1, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index cbe42279008ea2..8061b878621eeb 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4416,6 +4416,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } + case NI_Sve_ShiftLeftLogicalImm: + { + assert(!hasImmediateOperand); + if (intrin.op2->IsCnsIntOrI() && emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), emitTypeSize(intrin.baseType), false)) + { + MakeSrcContained(node, intrin.op2); + } + break; + } default: unreached(); From cd17e414217c621702fd14e3e3fbcbb1f8ddb387 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Apr 2025 22:32:54 -0700 Subject: [PATCH 055/120] Support Vector.ShiftRightLogical/RightShift Add Sve.ShiftRight*Imm --- src/coreclr/jit/gentree.cpp | 7 +++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 14 ++++++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++++ src/coreclr/jit/lowerarmarch.cpp | 10 ++++++++++ 4 files changed, 35 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a6f9fbb9e96eeb..d2217948fc8a95 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -29056,11 +29056,18 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N sveId = NI_Sve_RoundToZero; break; case NI_AdvSimd_ShiftLogical: + case NI_AdvSimd_ShiftArithmetic: sveId = NI_Sve_ShiftLeftLogical; break; case NI_AdvSimd_ShiftLeftLogical: sveId = NI_Sve_ShiftLeftLogicalImm; break; + case NI_AdvSimd_ShiftRightArithmetic: + sveId = NI_Sve_ShiftRightArithmeticImm; + break; + case NI_AdvSimd_ShiftRightLogical: + sveId = NI_Sve_ShiftRightLogicalImm; + break; case NI_AdvSimd_SignExtendWideningLower: sveId = NI_Sve_SignExtendWideningLower; break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 1d03e3c649adbe..fb2fb417de0809 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2303,6 +2303,20 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ShiftRightLogical: + case NI_Vector_op_RightShift: + { + assert(sig->numArgs == 2); + genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; + + op2 = impPopStack().val; + op1 = impSIMDPopStack(); + + retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(simdBaseJitType); + break; + } + case NI_Vector64_op_RightShift: case NI_Vector128_op_RightShift: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 5fc9cf661bd5a7..e32bb4c59fd385 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -87,6 +87,7 @@ HARDWARE_INTRINSIC(Vector, Negate, HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, ShiftLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ShiftRightLogical, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -111,6 +112,7 @@ HARDWARE_INTRINSIC(Vector, op_Inequality, HARDWARE_INTRINSIC(Vector, op_LeftShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, op_RightShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_Subtraction, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_UnaryNegation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, op_UnaryPlus, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -378,7 +380,9 @@ HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, HARDWARE_INTRINSIC(Sve, ShiftLeftLogicalImm, -1, -1, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticImm, -1, -1, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, ShiftRightLogicalImm, -1, -1, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8061b878621eeb..d2f692ed9e6acf 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4425,6 +4425,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } + case NI_Sve_ShiftRightArithmeticImm: + case NI_Sve_ShiftRightLogicalImm: + { + assert(!hasImmediateOperand); + if (intrin.op2->IsCnsIntOrI() && emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), emitTypeSize(intrin.baseType), true)) + { + MakeSrcContained(node, intrin.op2); + } + break; + } default: unreached(); From f9567fd95905ab16ed4ce257cfafc0643a20a42d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Apr 2025 23:04:03 -0700 Subject: [PATCH 056/120] Support Vector.ToScalar --- src/coreclr/jit/hwintrinsicarm64.cpp | 12 ++++++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 - src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index fb2fb417de0809..f910c3e4b542c5 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1638,6 +1638,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_ToScalar: + { + if (simdSize > 16) + { + op1 = impSIMDPopStack(); + + // Even for SVE, to scalar always would fetch 0th element from the overlapping SIMD register. + retNode = gtNewSimdToScalarNode(genActualType(simdBaseType), op1, simdBaseJitType, 16); + } + break; + } + case NI_Vector_get_AllBitsSet: case NI_Vector64_get_AllBitsSet: case NI_Vector128_get_AllBitsSet: diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 957a6aa4f029cd..043592d1119c9a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1858,7 +1858,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - //case NI_Vector_ToScalar: case NI_Vector64_ToScalar: case NI_Vector128_ToScalar: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index e32bb4c59fd385..178fbc43060237 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -93,6 +93,7 @@ HARDWARE_INTRINSIC(Vector, StoreAligned, HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(Vector, Truncate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From 9145170da8a969bb877ab189df5f9d8dfbb3bdb9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Apr 2025 23:18:25 -0700 Subject: [PATCH 057/120] Support Vector.Sum --- src/coreclr/jit/gentree.cpp | 6 ++++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 3 files changed, 8 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d2217948fc8a95..e9445ad661af7c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26362,6 +26362,12 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) + if (simdSize > 16) + { + tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Sve_AddAcross, simdBaseJitType, simdSize); + return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 16); + } + switch (simdBaseType) { case TYP_BYTE: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index f910c3e4b542c5..2f575b8bc0646d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2729,6 +2729,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_Sum: case NI_Vector64_Sum: case NI_Vector128_Sum: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 178fbc43060237..dc1640f66f9c8e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -93,6 +93,7 @@ HARDWARE_INTRINSIC(Vector, StoreAligned, HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Sum, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(Vector, Truncate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) From 4a76f71dd5f06195d3f50a944e6e234c3271918d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 4 Apr 2025 16:18:24 -0700 Subject: [PATCH 058/120] build errors fix --- src/coreclr/jit/gentree.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e9445ad661af7c..b2a00d99b06f48 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22068,7 +22068,12 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, if (intrinsic != NI_Illegal) { #if defined(FEATURE_MASKED_HW_INTRINSICS) - if (wrapInCvtm && (lookupType != type)) + + bool wrapCallInConvertVectorToMask = (lookupType != type); +#if defined(TARGET_ARM64) + wrapCallInConvertVectorToMask &= wrapInCvtm; +#endif + if (wrapCallInConvertVectorToMask) { assert(varTypeIsMask(lookupType)); GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -28956,6 +28961,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } } +#ifdef TARGET_ARM64 + //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // @@ -29108,6 +29115,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N return sveId; } +#endif + //------------------------------------------------------------------------------ // GetHWIntrinsicIdForUnOp: Returns intrinsic ID based on the oper, base type, and simd size // @@ -30051,7 +30060,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #endif // !TARGET_ARM64 else { - assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == Compiler::compVectorTLength)); + bool validSimdSize = (simdSize == 8) || (simdSize == 12) || (simdSize == 16); +#if defined(TARGET_ARM64) + validSimdSize |= (simdSize == Compiler::compVectorTLength); +#endif + assert(validSimdSize); #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); From a102b6f36bf11e863515f91eb7b8e75af5913bcf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 4 Apr 2025 16:26:58 -0700 Subject: [PATCH 059/120] Make GetScalableHWIntrinsicId() to all platforms to avoid #ifdef in callers --- src/coreclr/jit/gentree.cpp | 47 +++++++++++++++---------------------- src/coreclr/jit/gentree.h | 2 -- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b2a00d99b06f48..a7b5c5b85ea517 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20866,7 +20866,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_AbsScalar : NI_AdvSimd_Arm64_Abs; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -21567,11 +21567,12 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s { intrinsic = NI_AdvSimd_Ceiling; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -21997,10 +21998,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 -#if defined(TARGET_ARM64) hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, hwIntrinsicID); -#endif - assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } @@ -22542,9 +22540,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } } -#if defined(TARGET_ARM64) intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); -#endif assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -22731,9 +22727,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } } -#if defined(TARGET_ARM64) intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); -#endif assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -23468,11 +23462,12 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_Floor; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -23528,9 +23523,7 @@ GenTree* Compiler::gtNewSimdFmaNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 -#if defined(TARGET_ARM64) intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); -#endif assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); @@ -24777,9 +24770,8 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( if (intrinsic != NI_Illegal) { -#ifdef TARGET_ARM64 intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); -#endif + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25041,9 +25033,8 @@ GenTree* Compiler::gtNewSimdMinNativeNode( if (intrinsic != NI_Illegal) { -#ifdef TARGET_ARM64 intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); -#endif + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25623,11 +25614,12 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_RoundToNearest; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -26639,11 +26631,12 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType { intrinsic = NI_AdvSimd_RoundToZero; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -28961,15 +28954,17 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } } -#ifdef TARGET_ARM64 //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) { - //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places NamedIntrinsic sveId = id; + +#ifdef TARGET_ARM64 + //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places + if (simdSize > 16) { switch (id) @@ -29112,11 +29107,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N } // Make sure if we are using VL SIMD, we are not generating AdvSimd/NEON intrinsics assert((simdSize <= 16) || (sveId < FIRST_NI_AdvSimd) || (sveId > LAST_NI_AdvSimd)); +#endif // TARGET_ARM64 + return sveId; } -#endif - //------------------------------------------------------------------------------ // GetHWIntrinsicIdForUnOp: Returns intrinsic ID based on the oper, base type, and simd size // @@ -29210,9 +29205,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } } -#if defined(TARGET_ARM64) id = GetScalableHWIntrinsicId(simdSize, id); -#endif + return id; } @@ -30003,9 +29997,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } } -#ifdef TARGET_ARM64 id = GetScalableHWIntrinsicId(simdSize, id); -#endif + return id; } @@ -30378,9 +30371,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } } -#if defined(TARGET_ARM64) id = GetScalableHWIntrinsicId(simdSize, id); -#endif return id; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b3eebb450bb614..f0e5a9c2139052 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6571,9 +6571,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); -#ifdef TARGET_ARM64 static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); -#endif static NamedIntrinsic GetHWIntrinsicIdForUnOp( Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar); From eead7d781d0c4ca0dd84300d3aec187be30e22fa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 7 Apr 2025 13:55:23 -0700 Subject: [PATCH 060/120] For unroll strategy, continue using 16B size --- src/coreclr/jit/compiler.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d1228417cde2f3..f30bcec46029db 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9357,9 +9357,15 @@ class Compiler #elif defined(TARGET_ARM64) if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) { - return (size >= Compiler::compVectorTLength) ? Compiler::compVectorTLength : 0; + if (size >= Compiler::compVectorTLength) + { + return Compiler::compVectorTLength; + } + } + else + { + assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); } - assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); return (size >= FP_REGSIZE_BYTES) ? FP_REGSIZE_BYTES : 0; #else assert(!"roundDownSIMDSize() unimplemented on target arch"); @@ -9526,7 +9532,12 @@ class Compiler #if defined(FEATURE_SIMD) if (canUseSimd) { - maxRegSize = getPreferredVectorByteLength(); +#if defined(TARGET_ARM64) + // For now, just use SIMD register size for unroll threshold + // decisions + //maxRegSize = getPreferredVectorByteLength(); + maxRegSize = FP_REGSIZE_BYTES; +#endif // TARGET_ARM64 #if defined(TARGET_XARCH) assert(maxRegSize <= ZMM_REGSIZE_BYTES); From 6d139ee0ee1db4ebee31e9f25c412ab98c4cfb51 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 7 Apr 2025 16:47:59 -0700 Subject: [PATCH 061/120] Fix some errors for Vector_opEquality --- src/coreclr/jit/gentree.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a7b5c5b85ea517..991a86e3c541da 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22443,6 +22443,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( else { assert(simdSize > 16); + + intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); @@ -22478,6 +22480,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() == Vector128.AllBitsSet + // TODO-VL: Such checks might not work for DOTNET_MinVectorForSve, where we + // set DOTNET_MinVectorForSve=16 for testing purposes. if (simdSize == 8) { intrinsic = NI_Vector64_op_Equality; @@ -22488,6 +22492,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } if (simdSize > 16) { + intrinsic = NI_Vector_op_Equality; + GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); From 715a2c07ec8312825c4f131e65f77ec01c913945 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 8 Apr 2025 10:42:00 -0700 Subject: [PATCH 062/120] Disable optimizations for unroll/memcopy, etc. --- src/coreclr/jit/codegenarm64.cpp | 2 ++ src/coreclr/jit/compiler.h | 31 +++++++++++++++------------- src/coreclr/jit/hwintrinsicarm64.cpp | 8 +++++++ src/coreclr/jit/lower.cpp | 10 ++++++--- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 975f771f71b118..bdb9af3b957811 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2540,6 +2540,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd32_t)); hnd = emit->emitSimd32Const(constValue); emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); + //emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); + //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); } } else diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f30bcec46029db..06a3c9b6c70d67 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9243,11 +9243,12 @@ class Compiler return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) - { - return Compiler::compVectorTLength; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) + //if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) + //{ + // return Compiler::compVectorTLength; + //} + //else + if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) { return FP_REGSIZE_BYTES; } @@ -9355,14 +9356,16 @@ class Compiler // Return 0 if size is even less than XMM, otherwise - XMM return (size >= XMM_REGSIZE_BYTES) ? XMM_REGSIZE_BYTES : 0; #elif defined(TARGET_ARM64) - if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) - { - if (size >= Compiler::compVectorTLength) - { - return Compiler::compVectorTLength; - } - } - else + //if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) + //{ + // if (size >= Compiler::compVectorTLength) + // { + // return Compiler::compVectorTLength; + // } + //} + //else + //TODO-VL: For now, disable most of the optimizations like memmove, struct copy, + // etc. for VL { assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); } @@ -9614,7 +9617,7 @@ class Compiler bool structSizeMightRepresentSIMDType(size_t structSize) { #ifdef FEATURE_SIMD - return (structSize >= getMinVectorByteLength()) && (structSize <= getMaxVectorByteLength()); + return (structSize >= getMinVectorByteLength()) && (structSize <= getVectorTByteLength()); #else return false; #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2f575b8bc0646d..06dc54512772dc 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -118,6 +118,14 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_Vector; } + else if (strncmp(className, "Vector256", 9) == 0) + { + return InstructionSet_ILLEGAL; + } + else if (strncmp(className, "Vector512", 9) == 0) + { + return InstructionSet_ILLEGAL; + } else if (strncmp(className, "Vector", 6) == 0) { return InstructionSet_Vector; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 27428ef3dab357..43613255be3f2b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -9531,7 +9531,7 @@ bool Lowering::GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescing // void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) { -// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from +// LA, RISC-V and ARM32 more likely to receive a terrible performance hit from // unaligned accesses making this optimization questionable. #if defined(TARGET_XARCH) || defined(TARGET_ARM64) if (!comp->opts.OptimizationEnabled()) @@ -9753,7 +9753,7 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) } return; -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#if defined(TARGET_AMD64) case TYP_SIMD16: if (comp->getPreferredVectorByteLength() >= 32) { @@ -9771,7 +9771,11 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) } tryReusingPrevValue = true; break; -#endif // TARGET_AMD64 || TARGET_ARM64 +#elif defined(TARGET_ARM64) // TARGET_AMD64 + case TYP_SIMD16: + tryReusingPrevValue = true; + break; +#endif // TARGET_AMD64 #endif // FEATURE_HW_INTRINSICS #endif // TARGET_64BIT From b5d446034b8741abeb7e41bf6d09b682cbefc64a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 8 Apr 2025 11:16:02 -0700 Subject: [PATCH 063/120] Add comments in runtime where correct VectorT size should be reflected --- src/coreclr/vm/methodtablebuilder.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 16186cb5b08f1a..3bc56a7f91b7c6 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1127,7 +1127,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() { STANDARD_VM_CONTRACT; -#if defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) if (!bmtProp->fIsIntrinsicType) return false; @@ -1146,6 +1146,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); uint32_t numInstanceFieldBytes = 16; +#if defined(TARGET_X86) || defined(TARGET_AMD64) if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) { numInstanceFieldBytes = 64; @@ -1154,6 +1155,15 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() { numInstanceFieldBytes = 32; } +#elif defined(TARGET_ARM64) + if (CPUCompileFlags.IsSet(InstructionSet_Sve_Arm64)) + { + // TODO-VL: This should use GetSveLengthFromOS() + // Probably use CLRConfig::XXX environment variable + // for testing + numInstanceFieldBytes = 32; + } +#endif // TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 if (numInstanceFieldBytes != 16) { @@ -1166,7 +1176,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() return true; } -#endif // TARGET_X86 || TARGET_AMD64 +#endif // TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 return false; } From 15bb8a407ab77b6bfec4c7fa50e9763c3c004fbe Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 8 Apr 2025 12:03:22 -0700 Subject: [PATCH 064/120] Fix bug for Vector.ConvertToDouble --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 06dc54512772dc..9fb26c0bba4aeb 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1008,6 +1008,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble; + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); break; From 9e99f270563b1323811ae78447f4ce3aba69e7eb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 8 Apr 2025 14:38:13 -0700 Subject: [PATCH 065/120] Add jit-ee GetTargetVectorLength() --- src/coreclr/inc/corinfoinstructionset.h | 40 ++++++++++--------- src/coreclr/inc/corjit.h | 2 + src/coreclr/inc/icorjitinfoimpl_generated.h | 2 + src/coreclr/inc/jiteeversionguid.h | 10 ++--- src/coreclr/jit/ICorJitInfo_names_generated.h | 1 + .../jit/ICorJitInfo_wrapper_generated.hpp | 8 ++++ .../Runtime/ReadyToRunInstructionSetHelper.cs | 1 + .../tools/Common/JitInterface/CorInfoImpl.cs | 8 ++++ .../JitInterface/CorInfoImpl_generated.cs | 22 ++++++++-- .../JitInterface/CorInfoInstructionSet.cs | 40 +++++++++++-------- .../ThunkGenerator/InstructionSetDesc.txt | 3 ++ .../ThunkGenerator/ThunkInput.txt | 1 + .../aot/jitinterface/jitinterface_generated.h | 9 +++++ .../tools/superpmi/superpmi-shared/lwmlist.h | 1 + .../superpmi-shared/methodcontext.cpp | 23 +++++++++++ .../superpmi/superpmi-shared/methodcontext.h | 5 +++ .../superpmi-shim-collector/icorjitinfo.cpp | 8 ++++ .../icorjitinfo_generated.cpp | 6 +++ .../icorjitinfo_generated.cpp | 5 +++ .../tools/superpmi/superpmi/icorjitinfo.cpp | 7 ++++ src/coreclr/vm/jitinterface.cpp | 26 ++++++++++++ src/coreclr/vm/jitinterface.h | 2 +- 22 files changed, 186 insertions(+), 44 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 79c22d4767895d..92d6494e8c88a6 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -25,23 +25,23 @@ enum CORINFO_InstructionSet InstructionSet_Sha1=7, InstructionSet_Sha256=8, InstructionSet_Atomics=9, - InstructionSet_Vector64=10, - InstructionSet_Vector128=11, - InstructionSet_Dczva=12, - InstructionSet_Rcpc=13, - InstructionSet_VectorT128=14, - InstructionSet_Rcpc2=15, - InstructionSet_Sve=16, - InstructionSet_ArmBase_Arm64=17, - InstructionSet_AdvSimd_Arm64=18, - InstructionSet_Aes_Arm64=19, - InstructionSet_Crc32_Arm64=20, - InstructionSet_Dp_Arm64=21, - InstructionSet_Rdm_Arm64=22, - InstructionSet_Sha1_Arm64=23, - InstructionSet_Sha256_Arm64=24, - InstructionSet_Sve_Arm64=25, - InstructionSet_Vector=26, + InstructionSet_Vector=10, + InstructionSet_Vector64=11, + InstructionSet_Vector128=12, + InstructionSet_Dczva=13, + InstructionSet_Rcpc=14, + InstructionSet_VectorT128=15, + InstructionSet_Rcpc2=16, + InstructionSet_Sve=17, + InstructionSet_ArmBase_Arm64=18, + InstructionSet_AdvSimd_Arm64=19, + InstructionSet_Aes_Arm64=20, + InstructionSet_Crc32_Arm64=21, + InstructionSet_Dp_Arm64=22, + InstructionSet_Rdm_Arm64=23, + InstructionSet_Sha1_Arm64=24, + InstructionSet_Sha256_Arm64=25, + InstructionSet_Sve_Arm64=26, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -443,6 +443,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_Sve) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) resultflags.RemoveInstructionSet(InstructionSet_Sve); + if (resultflags.HasInstructionSet(InstructionSet_Vector) && !resultflags.HasInstructionSet(InstructionSet_Sve)) + resultflags.RemoveInstructionSet(InstructionSet_Vector); #endif // TARGET_ARM64 #ifdef TARGET_AMD64 if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64)) @@ -861,8 +863,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Sha256_Arm64"; case InstructionSet_Atomics : return "Atomics"; - case InstructionSet_Vector: - return "Vector`1"; + case InstructionSet_Vector : + return "Vector"; case InstructionSet_Vector64 : return "Vector64"; case InstructionSet_Vector128 : diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index d8e6a774784f52..a29f81767804f3 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -438,6 +438,8 @@ class ICorJitInfo : public ICorDynamicInfo // virtual uint32_t getExpectedTargetArchitecture() = 0; + virtual uint32_t getTargetVectorLength() = 0; + // Fetches extended flags for a particular compilation instance. Returns // the number of bytes written to the provided buffer. virtual uint32_t getJitFlags( diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 08b1004d4642d3..ceaca533781a4d 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -742,6 +742,8 @@ uint16_t getRelocTypeHint( uint32_t getExpectedTargetArchitecture() override; +uint32_t getTargetVectorLength() override; + uint32_t getJitFlags( CORJIT_FLAGS* flags, uint32_t sizeInBytes) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index ca025f62a7ec4d..13edca4df1d92f 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 4463d6ac-dfcb-4ab0-a941-c53b56089b7c */ - 0x4463d6ac, - 0xdfcb, - 0x4ab0, - {0xa9, 0x41, 0xc5, 0x3b, 0x56, 0x08, 0x9b, 0x7c} +constexpr GUID JITEEVersionIdentifier = { /* dd603e43-c783-40e0-b7da-42585a9befb7 */ + 0xdd603e43, + 0xc783, + 0x40e0, + {0xb7, 0xda, 0x42, 0x58, 0x5a, 0x9b, 0xef, 0xb7} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index 94e244c0749bfa..94a39391b0c873 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -179,6 +179,7 @@ DEF_CLR_API(recordCallSite) DEF_CLR_API(recordRelocation) DEF_CLR_API(getRelocTypeHint) DEF_CLR_API(getExpectedTargetArchitecture) +DEF_CLR_API(getTargetVectorLength) DEF_CLR_API(getJitFlags) DEF_CLR_API(getSpecialCopyHelper) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 9c7e6c1099826d..5a80d4b66b5042 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1736,6 +1736,14 @@ uint32_t WrapICorJitInfo::getExpectedTargetArchitecture() return temp; } +uint32_t WrapICorJitInfo::getTargetVectorLength() +{ + API_ENTER(getTargetVectorLength); + uint32_t temp = wrapHnd->getTargetVectorLength(); + API_LEAVE(getTargetVectorLength); + return temp; +} + uint32_t WrapICorJitInfo::getJitFlags( CORJIT_FLAGS* flags, uint32_t sizeInBytes) diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 5e50f7cc00b7bf..1a28b0fcc2bac5 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -40,6 +40,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Sha256: return ReadyToRunInstructionSet.Sha256; case InstructionSet.ARM64_Sha256_Arm64: return ReadyToRunInstructionSet.Sha256; case InstructionSet.ARM64_Atomics: return ReadyToRunInstructionSet.Atomics; + case InstructionSet.ARM64_Vector: return null; case InstructionSet.ARM64_Vector64: return null; case InstructionSet.ARM64_Vector128: return null; case InstructionSet.ARM64_Dczva: return null; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index ebbb614841f987..6c36d6e6a9a153 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -4124,6 +4124,14 @@ private ushort getRelocTypeHint(void* target) } } +#pragma warning disable CA1822 // Mark members as static + private uint getTargetVectorLength() +#pragma warning restore CA1822 // Mark members as static + { + // Temporary. Can use Sve.GetActiveElementCount or equivalent + return 0; + } + private uint getExpectedTargetArchitecture() { TargetArchitecture arch = _compilation.TypeSystemContext.Target.Architecture; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 91df884c58272c..b3838e11660f99 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2590,6 +2590,21 @@ private static uint _getExpectedTargetArchitecture(IntPtr thisHandle, IntPtr* pp } } + [UnmanagedCallersOnly] + private static uint _getTargetVectorLength(IntPtr thisHandle, IntPtr* ppException) + { + var _this = GetThis(thisHandle); + try + { + return _this.getTargetVectorLength(); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + return default; + } + } + [UnmanagedCallersOnly] private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_FLAGS* flags, uint sizeInBytes) { @@ -2623,7 +2638,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 177); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 178); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2800,8 +2815,9 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[172] = (delegate* unmanaged)&_recordRelocation; callbacks[173] = (delegate* unmanaged)&_getRelocTypeHint; callbacks[174] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[175] = (delegate* unmanaged)&_getJitFlags; - callbacks[176] = (delegate* unmanaged)&_getSpecialCopyHelper; + callbacks[175] = (delegate* unmanaged)&_getTargetVectorLength; + callbacks[176] = (delegate* unmanaged)&_getJitFlags; + callbacks[177] = (delegate* unmanaged)&_getSpecialCopyHelper; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 7752d65befc82c..ce27a5a1ac6c2a 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -25,6 +25,7 @@ public enum InstructionSet ARM64_Sha1 = InstructionSet_ARM64.Sha1, ARM64_Sha256 = InstructionSet_ARM64.Sha256, ARM64_Atomics = InstructionSet_ARM64.Atomics, + ARM64_Vector = InstructionSet_ARM64.Vector, ARM64_Vector64 = InstructionSet_ARM64.Vector64, ARM64_Vector128 = InstructionSet_ARM64.Vector128, ARM64_Dczva = InstructionSet_ARM64.Dczva, @@ -203,22 +204,23 @@ public enum InstructionSet_ARM64 Sha1 = 7, Sha256 = 8, Atomics = 9, - Vector64 = 10, - Vector128 = 11, - Dczva = 12, - Rcpc = 13, - VectorT128 = 14, - Rcpc2 = 15, - Sve = 16, - ArmBase_Arm64 = 17, - AdvSimd_Arm64 = 18, - Aes_Arm64 = 19, - Crc32_Arm64 = 20, - Dp_Arm64 = 21, - Rdm_Arm64 = 22, - Sha1_Arm64 = 23, - Sha256_Arm64 = 24, - Sve_Arm64 = 25, + Vector = 10, + Vector64 = 11, + Vector128 = 12, + Dczva = 13, + Rcpc = 14, + VectorT128 = 15, + Rcpc2 = 16, + Sve = 17, + ArmBase_Arm64 = 18, + AdvSimd_Arm64 = 19, + Aes_Arm64 = 20, + Crc32_Arm64 = 21, + Dp_Arm64 = 22, + Rdm_Arm64 = 23, + Sha1_Arm64 = 24, + Sha256_Arm64 = 25, + Sve_Arm64 = 26, } public enum InstructionSet_X64 @@ -502,6 +504,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case TargetArchitecture.ARM64: switch (input) { + case InstructionSet.ARM64_Vector: return InstructionSet.ARM64_Sve; case InstructionSet.ARM64_Vector64: return InstructionSet.ARM64_AdvSimd; case InstructionSet.ARM64_Vector128: return InstructionSet.ARM64_AdvSimd; } @@ -595,6 +598,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve)) resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector)) + resultflags.AddInstructionSet(InstructionSet.ARM64_Sve); break; case TargetArchitecture.X64: @@ -1028,6 +1033,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd)) resultflags.AddInstructionSet(InstructionSet.ARM64_Sve); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve)) + resultflags.AddInstructionSet(InstructionSet.ARM64_Vector); break; case TargetArchitecture.X64: @@ -1408,6 +1415,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("sha1", "Sha1", InstructionSet.ARM64_Sha1, true); yield return new InstructionSetInfo("sha2", "Sha256", InstructionSet.ARM64_Sha256, true); yield return new InstructionSetInfo("lse", "", InstructionSet.ARM64_Atomics, true); + yield return new InstructionSetInfo("Vector", "", InstructionSet.ARM64_Vector, false); yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false); yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index d206c3056f8133..ffbea70f3490bb 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -209,6 +209,7 @@ instructionset ,ARM64 ,Rdm , ,24 ,Rdm instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1 instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse +instructionset ,ARM64 , , , ,Vector , instructionset ,ARM64 , , , ,Vector64 , instructionset ,ARM64 , , , ,Vector128 , instructionset ,ARM64 , , , ,Dczva , @@ -227,6 +228,7 @@ instructionset64bit,ARM64 ,Sha1 instructionset64bit,ARM64 ,Sha256 instructionset64bit,ARM64 ,Sve +vectorinstructionset,ARM64,Vector vectorinstructionset,ARM64,Vector64 vectorinstructionset,ARM64,Vector128 @@ -241,6 +243,7 @@ implication ,ARM64 ,Vector64 ,AdvSimd implication ,ARM64 ,Vector128 ,AdvSimd implication ,ARM64 ,VectorT128 ,AdvSimd implication ,ARM64 ,Sve ,AdvSimd +implication ,ARM64 ,Vector ,Sve ; ,name and aliases ,archs ,lower baselines included by implication ; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 3aaa80673334f4..476cede76faedf 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -339,5 +339,6 @@ FUNCTIONS void recordRelocation(void* location, void* locationRW, void* target, uint16_t fRelocType, int32_t addlDelta) uint16_t getRelocTypeHint(void* target) uint32_t getExpectedTargetArchitecture() + uint32_t getTargetVectorLength() uint32_t getJitFlags(CORJIT_FLAGS* flags, uint32_t sizeInBytes) CORINFO_METHOD_HANDLE getSpecialCopyHelper(CORINFO_CLASS_HANDLE type) = 0; diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index a1a6122037d27a..9b3823ba223fc1 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -186,6 +186,7 @@ struct JitInterfaceCallbacks void (* recordRelocation)(void * thisHandle, CorInfoExceptionClass** ppException, void* location, void* locationRW, void* target, uint16_t fRelocType, int32_t addlDelta); uint16_t (* getRelocTypeHint)(void * thisHandle, CorInfoExceptionClass** ppException, void* target); uint32_t (* getExpectedTargetArchitecture)(void * thisHandle, CorInfoExceptionClass** ppException); + uint32_t (* getTargetVectorLength)(void * thisHandle, CorInfoExceptionClass** ppException); uint32_t (* getJitFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORJIT_FLAGS* flags, uint32_t sizeInBytes); CORINFO_METHOD_HANDLE (* getSpecialCopyHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE type); @@ -1915,6 +1916,14 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } + virtual uint32_t getTargetVectorLength() +{ + CorInfoExceptionClass* pException = nullptr; + uint32_t temp = _callbacks->getTargetVectorLength(_thisHandle, &pException); + if (pException != nullptr) throw pException; + return temp; +} + virtual uint32_t getJitFlags( CORJIT_FLAGS* flags, uint32_t sizeInBytes) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index 50e9720aefaa2a..1224afd0869790 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -124,6 +124,7 @@ LWM(GetReadyToRunHelper, GetReadyToRunHelper_TOKENin, GetReadyToRunHelper_TOKENo LWM(GetReadyToRunDelegateCtorHelper, GetReadyToRunDelegateCtorHelper_TOKENIn, Agnostic_CORINFO_LOOKUP) LWM(GetRelocTypeHint, DWORDLONG, DWORD) LWM(GetExpectedTargetArchitecture, DWORD, DWORD) +LWM(GetTargetVectorLength, DWORD, DWORD) LWM(GetSharedCCtorHelper, DWORDLONG, DWORD) LWM(GetStringConfigValue, DWORD, DWORD) LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 0621f2bcd1eb23..a2c7b9bb7b50ab 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -6523,6 +6523,29 @@ WORD MethodContext::repGetRelocTypeHint(void* target) return retVal; } +void MethodContext::recGetTargetVectorLength(DWORD result) +{ + if (GetTargetVectorLength == nullptr) + GetTargetVectorLength = new LightWeightMap(); + + DWORD key = 0; // There is only ever a single entry to this map + GetTargetVectorLength->Add(key, result); + DEBUG_REC(dmpGetTargetVectorLength(key, result)); +} +void MethodContext::dmpGetTargetVectorLength(DWORD key, DWORD result) +{ + printf("GetTargetVectorLength key %u, res %u", key, result); +} +DWORD MethodContext::repGetTargetVectorLength() +{ + DWORD key = 0; + + DWORD value = LookupByKeyOrMiss(GetTargetVectorLength, key, ": key %08X", key); + + DEBUG_REP(dmpGetTargetVectorLength(key, value)); + return value; +} + void MethodContext::recGetExpectedTargetArchitecture(DWORD result) { if (GetExpectedTargetArchitecture == nullptr) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 8763439565410e..21d05fac407b86 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -802,6 +802,10 @@ class MethodContext void dmpGetRelocTypeHint(DWORDLONG key, DWORD value); WORD repGetRelocTypeHint(void* target); + void recGetTargetVectorLength(DWORD result); + void dmpGetTargetVectorLength(DWORD key, DWORD result); + DWORD repGetTargetVectorLength(); + void recGetExpectedTargetArchitecture(DWORD result); void dmpGetExpectedTargetArchitecture(DWORD key, DWORD result); DWORD repGetExpectedTargetArchitecture(); @@ -1208,6 +1212,7 @@ enum mcPackets Packet_GetMethodInstantiationArgument = 227, Packet_GetInstantiatedEntry = 228, Packet_NotifyInstructionSetUsage = 229, + Packet_GetTargetVectorLength = 230, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index ed693cdf1b2a9f..0f5dc3641e4784 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -2019,6 +2019,14 @@ uint16_t interceptor_ICJI::getRelocTypeHint(void* target) return result; } +uint32_t interceptor_ICJI::getTargetVectorLength() +{ + mc->cr->AddCall("getTargetVectorLength"); + DWORD result = original_ICorJitInfo->getTargetVectorLength(); + mc->recGetTargetVectorLength(result); + return result; +} + // For what machine does the VM expect the JIT to generate code? The VM // returns one of the IMAGE_FILE_MACHINE_* values. Note that if the VM // is cross-compiling (such as the case for crossgen2), it will return a diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index d14acec9674bb5..a8e8d182f3727e 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -1433,6 +1433,12 @@ uint32_t interceptor_ICJI::getExpectedTargetArchitecture() return original_ICorJitInfo->getExpectedTargetArchitecture(); } +uint32_t interceptor_ICJI::getTargetVectorLength() +{ + mcs->AddCall("getTargetVectorLength"); + return original_ICorJitInfo->getTargetVectorLength(); +} + uint32_t interceptor_ICJI::getJitFlags( CORJIT_FLAGS* flags, uint32_t sizeInBytes) diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index ee04f7d948bb01..044803fb18149b 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -1258,6 +1258,11 @@ uint32_t interceptor_ICJI::getExpectedTargetArchitecture() return original_ICorJitInfo->getExpectedTargetArchitecture(); } +uint32_t interceptor_ICJI::getTargetVectorLength() +{ + return original_ICorJitInfo->getTargetVectorLength(); +} + uint32_t interceptor_ICJI::getJitFlags( CORJIT_FLAGS* flags, uint32_t sizeInBytes) diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 544b8f5d169bd8..817914045d8024 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -1845,6 +1845,13 @@ uint16_t MyICJI::getRelocTypeHint(void* target) return result; } +uint32_t MyICJI::getTargetVectorLength() +{ + jitInstance->mc->cr->AddCall("getTargetVectorLength"); + DWORD result = jitInstance->mc->repGetTargetVectorLength(); + return result; +} + // For what machine does the VM expect the JIT to generate code? The VM // returns one of the IMAGE_FILE_MACHINE_* values. Note that if the VM // is cross-compiling (such as the case for crossgen2), it will return a diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 5d85d98fc4acbc..ea0eb7e4ef88b2 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11685,6 +11685,21 @@ WORD CEEJitInfo::getRelocTypeHint(void * target) return (WORD)-1; } +#ifdef TARGET_ARM64 +extern "C" uint64_t GetSveLengthFromOS(); +#endif + +uint32_t CEEJitInfo::getTargetVectorLength() +{ + LIMITED_METHOD_CONTRACT; + +#ifdef TARGET_ARM64 + return GetSveLengthFromOS(); +#else + UNREACHABLE(); // only called on Arm64 +#endif +} + uint32_t CEEJitInfo::getExpectedTargetArchitecture() { LIMITED_METHOD_CONTRACT; @@ -14362,6 +14377,17 @@ uint32_t CEEInfo::getExpectedTargetArchitecture() return IMAGE_FILE_MACHINE_NATIVE; } +uint32_t CEEInfo::getTargetVectorLength() +{ + LIMITED_METHOD_CONTRACT; + +#ifdef TARGET_ARM64 + return GetSveLengthFromOS(); +#else + UNREACHABLE(); // only called on Arm64 +#endif +} + void CEEInfo::setBoundaries(CORINFO_METHOD_HANDLE ftn, ULONG32 cMap, ICorDebugInfo::OffsetMapping *pMap) { diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 7f1835e458a53a..dc95f81c4d30d5 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -634,7 +634,7 @@ class CEEJitInfo : public CEEInfo int32_t addlDelta) override final; uint16_t getRelocTypeHint(void * target) override final; - + uint32_t getTargetVectorLength() override final; uint32_t getExpectedTargetArchitecture() override final; void ResetForJitRetry() From a9367ad44a90aa9072b6176b1146faf418cc4664 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 10 Apr 2025 08:55:47 -0700 Subject: [PATCH 066/120] Use MinVectorLengthForSve() --- src/coreclr/jit/abi.cpp | 4 +- src/coreclr/jit/codegenarm64.cpp | 4 +- src/coreclr/jit/compiler.cpp | 59 ++++++++++++++++++++++------ src/coreclr/jit/compiler.h | 38 +++++++++++++++--- src/coreclr/jit/emitarm64sve.cpp | 12 +++--- src/coreclr/jit/gentree.cpp | 34 ++++++++-------- src/coreclr/jit/hwintrinsic.cpp | 7 ++-- src/coreclr/jit/hwintrinsicarm64.cpp | 14 +++---- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/importercalls.cpp | 8 ++-- src/coreclr/jit/jitconfigvalues.h | 2 +- src/coreclr/vm/jitinterface.cpp | 46 ++++++++++++++++------ 12 files changed, 158 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index d5e393959f6234..22ef19cfadb8ca 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -125,10 +125,10 @@ var_types ABIPassingSegment::GetRegisterType() const return TYP_SIMD16; #ifdef TARGET_ARM64 case 32: - assert(Size == Compiler::compVectorTLength); + assert(Compiler::SizeMatchesVectorTLength(Size)); return TYP_SIMD32; case 64: - assert(Size == Compiler::compVectorTLength); + assert(Compiler::SizeMatchesVectorTLength(Size)); return TYP_SIMD64; #endif // TARGET_ARM64 #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index bdb9af3b957811..089df2890e5055 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5502,7 +5502,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); - assert((varSize == 16) || (varSize == Compiler::compVectorTLength)); + assert((varSize == 16) || (Compiler::SizeMatchesVectorTLength(varSize))); regNumber op1Reg = genConsumeReg(op1); assert(op1Reg != REG_NA); @@ -5573,7 +5573,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); - assert((varSize == 16) || (varSize == Compiler::compVectorTLength)); + assert((varSize == 16) || (Compiler::SizeMatchesVectorTLength(varSize))); regNumber srcReg = node->GetRegNum(); assert((srcReg != REG_NA) || (varTypeIsSIMDVL(node->TypeGet()))); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 7001eda4e4452f..fa6e44f0df6a87 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -490,15 +490,6 @@ Compiler::Compiler(ArenaAllocator* arena, info.compHasNextCallRetAddr = false; info.compIsVarArgs = false; - -#if defined(TARGET_ARM64) - // TODO-VL: This should come from runtime itself and then override with this environment variable - Compiler::compVectorTLength = ReinterpretHexAsDecimal(JitConfig.VariableVectorLength()); - //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; - //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; - //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; - //genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); -#endif // TARGET_ARM64 } //------------------------------------------------------------------------ @@ -685,7 +676,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS return useType; } #ifdef TARGET_ARM64 - if (structSize == compVectorTLength) + if (SizeMatchesVectorTLength(structSize)) { var_types hfaType = GetHfaType(clsHnd); return varTypeIsSIMDVL(hfaType) ? hfaType : TYP_UNKNOWN; @@ -919,7 +910,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 - || (varTypeIsSIMDVL(GetHfaType(clsHnd)) && (structSize == compVectorTLength)) + || (varTypeIsSIMDVL(GetHfaType(clsHnd)) && (SizeMatchesVectorTLength(structSize))) #endif ) { @@ -2161,6 +2152,8 @@ unsigned ReinterpretHexAsDecimal(unsigned in) #ifdef TARGET_ARM64 unsigned Compiler::compVectorTLength = 0; +unsigned Compiler::compMinVectorTLengthForSve = 0; +bool Compiler::compUseSveForVectorT = false; #endif void Compiler::compInitOptions(JitFlags* jitFlags) @@ -2603,6 +2596,50 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif // DEBUG } + +#if defined(TARGET_ARM64) + + compMinVectorTLengthForSve = ReinterpretHexAsDecimal(JitConfig.MinVectorLengthForSve()); + bool isInvalidMinVectorTLength = false; + // Should be at least 16B or 128b + isInvalidMinVectorTLength |= (compMinVectorTLengthForSve < 16); + // Should be at most 256B or 2048b + isInvalidMinVectorTLength |= (compMinVectorTLengthForSve > 256); + // Should be power of 2 + isInvalidMinVectorTLength |= ((compMinVectorTLengthForSve & (compMinVectorTLengthForSve - 1)) != 0); + + if (isInvalidMinVectorTLength) + { + // In that case, default it to 32B. + compMinVectorTLengthForSve = 32; + } + + if (info.compMatchedVM) + { + compVectorTLength = info.compCompHnd->getTargetVectorLength(); + CORINFO_InstructionSetFlags instructionSetFlags = jitFlags->GetInstructionSetFlags(); + + if (!instructionSetFlags.HasInstructionSet(InstructionSet_Sve) && !instructionSetFlags.HasInstructionSet(InstructionSet_Sve_Arm64)) + { + compMinVectorTLengthForSve = UINT_MAX; + } + } + else + { + // For altjit, just use the default 16B + // To use SVE: Set DOTNET_SimulatedVLForSve >= DOTNET_MinVectorLengthForSve + // To use NEON: Set DOTNET_SimulatedVLForSve < DOTNET_MinVectorLengthForSve + compVectorTLength = 16; + } + + compUseSveForVectorT = (compVectorTLength >= compMinVectorTLengthForSve); + + //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; + //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; + //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; + //genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); +#endif // TARGET_ARM64 + if (compIsForInlining()) { return; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 06a3c9b6c70d67..3941c53dc5e4d4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2610,10 +2610,6 @@ class Compiler EHNodeDsc* ehnTree; // root of the tree comprising the EHnodes. EHNodeDsc* ehnNext; // root of the tree comprising the EHnodes. -#if defined(TARGET_ARM64) - static unsigned compVectorTLength; -#endif - struct EHNodeDsc { enum EHBlockType @@ -8907,6 +8903,34 @@ class Compiler XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ +#if defined(TARGET_ARM64) +private: + + static unsigned compVectorTLength; + static unsigned compMinVectorTLengthForSve; + static bool compUseSveForVectorT; + +public: + FORCEINLINE static unsigned GetVectorTLength() + { + return compVectorTLength; + } + FORCEINLINE static bool UseSveForVectorT() + { + return compUseSveForVectorT; + } + FORCEINLINE static bool UseSveForSimdSize(unsigned simdSize) + { + return compUseSveForVectorT && (simdSize >= compMinVectorTLengthForSve); + } + FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) + { + return simdSize == compVectorTLength; + } +#endif + +public: + bool IsBaselineSimdIsaSupported() { #ifdef FEATURE_SIMD @@ -9192,7 +9216,7 @@ class Compiler #elif defined(TARGET_ARM64) if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) { - return Compiler::compVectorTLength; + return GetVectorTLength(); } else if (compExactlyDependsOn(InstructionSet_VectorT128)) { @@ -9617,7 +9641,11 @@ class Compiler bool structSizeMightRepresentSIMDType(size_t structSize) { #ifdef FEATURE_SIMD +#if defined(TARGET_ARM64) return (structSize >= getMinVectorByteLength()) && (structSize <= getVectorTByteLength()); +#else + return (structSize >= getMinVectorByteLength()) && (structSize <= getMaxVectorByteLength()); +#endif // TARGET_ARM64 #else return false; #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 06a3d709387da5..3e9c2b594a4420 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2672,13 +2672,13 @@ void emitter::emitInsSve_R_R_I(instruction ins, // Since SVE uses "mul vl", we need to make sure that we calculate // the offset correctly. - if (Compiler::compVectorTLength > 16) // TODO-VL: Convert it into helper method + if (Compiler::UseSveForVectorT()) { - if ((imm % Compiler::compVectorTLength) == 0) + if ((imm % Compiler::GetVectorTLength()) == 0) { // If imm is a multiple of Compiler::compVectorTLength, // we can use the `[#imm mul vl]` - imm = imm / Compiler::compVectorTLength; + imm = imm / Compiler::GetVectorTLength(); } else { @@ -2712,13 +2712,13 @@ void emitter::emitInsSve_R_R_I(instruction ins, // Since SVE uses "mul vl", we need to make sure that we calculate // the offset correctly. - if (Compiler::compVectorTLength > 16) // TODO-VL: Convert it into helper method + if (Compiler::UseSveForVectorT()) { - if ((imm % Compiler::compVectorTLength) == 0) + if ((imm % Compiler::GetVectorTLength()) == 0) { // If imm is a multiple of Compiler::compVectorTLength, // we can use the `[#imm mul vl]` - imm = imm / Compiler::compVectorTLength; + imm = imm / Compiler::GetVectorTLength(); } else { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 991a86e3c541da..fe227904ed4bad 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20969,7 +20969,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( op2 = gtNewOperNode(GT_NEG, TYP_INT, op2); } - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); } @@ -21941,7 +21941,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, unreached(); } #elif defined(TARGET_ARM64) - assert((simdSize == 8) || (simdSize == 16) || (simdSize == compVectorTLength)); + assert((simdSize == 8) || (simdSize == 16) || (SizeMatchesVectorTLength(simdSize))); switch (simdSourceBaseJitType) { @@ -22442,7 +22442,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else { - assert(simdSize > 16); + assert(UseSveForSimdSize(simdSize)); intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = @@ -22490,7 +22490,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( { intrinsic = NI_Vector128_op_Equality; } - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { intrinsic = NI_Vector_op_Equality; @@ -22652,7 +22652,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); @@ -22701,7 +22701,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else { - assert(simdSize > 16); + assert(UseSveForSimdSize(simdSize)); intrinsic = NI_Vector_op_Inequality; @@ -22778,7 +22778,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { intrinsic = NI_Sve_ConditionalSelect; op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); @@ -22788,7 +22788,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( intrinsic = NI_AdvSimd_BitwiseSelect; } - intrinsic = (simdSize > 16) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; + intrinsic = UseSveForSimdSize(simdSize) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -26365,7 +26365,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Sve_AddAcross, simdBaseJitType, simdSize); return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 16); @@ -26927,7 +26927,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo } else { - assert((simdSize == 8) || (simdSize == compVectorTLength)); + assert((simdSize == 8) || (SizeMatchesVectorTLength(simdSize))); tmp1 = op1; } @@ -27143,7 +27143,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) - if ((simdSize == 16) || (simdSize == compVectorTLength)) + if ((simdSize == 16) || (SizeMatchesVectorTLength(simdSize))) { if (varTypeIsFloating(simdBaseType)) { @@ -28971,7 +28971,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N #ifdef TARGET_ARM64 //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places - if (simdSize > 16) + if (Compiler::UseSveForSimdSize(simdSize)) { switch (id) { @@ -29144,7 +29144,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( assert(!isScalar || (simdSize == 8)); assert(!isScalar || varTypeIsFloating(simdBaseType)); assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert((simdSize <= 16) || (simdSize == Compiler::compVectorTLength)); + assert((simdSize <= 16) || (Compiler::SizeMatchesVectorTLength(simdSize))); #else if (simdSize == 64) { @@ -29252,7 +29252,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(!isScalar || (simdSize == 8)); assert(!isScalar || varTypeIsFloating(simdBaseType)); assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert((simdSize <= 16) || (simdSize == Compiler::compVectorTLength)); + assert((simdSize <= 16) || (Compiler::SizeMatchesVectorTLength(simdSize))); #else if (simdSize == 64) { @@ -30061,7 +30061,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { bool validSimdSize = (simdSize == 8) || (simdSize == 12) || (simdSize == 16); #if defined(TARGET_ARM64) - validSimdSize |= (simdSize == Compiler::compVectorTLength); + validSimdSize |= (Compiler::SizeMatchesVectorTLength(simdSize)); #endif assert(validSimdSize); @@ -30363,7 +30363,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; } #elif defined(TARGET_ARM64) - if (simdSize > 16) + if (Compiler::UseSveForSimdSize(simdSize)) { id = NI_Sve_CompareNotEqualTo; } @@ -30458,7 +30458,7 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( case GT_GT: case GT_LT: { - if (simdSize > 16) + if (Compiler::UseSveForSimdSize(simdSize)) { lookupType = TYP_MASK; } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 219dd0aef3f19c..1566f037d38f84 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -850,6 +850,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_Sha1, LAST_NI_Sha1 }, { FIRST_NI_Sha256, LAST_NI_Sha256 }, { NI_Illegal, NI_Illegal }, // Atomics + { FIRST_NI_Vector, LAST_NI_Vector }, { FIRST_NI_Vector64, LAST_NI_Vector64 }, { FIRST_NI_Vector128, LAST_NI_Vector128 }, { NI_Illegal, NI_Illegal }, // Dczva @@ -866,7 +867,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // Sha1_Arm64 { NI_Illegal, NI_Illegal }, // Sha256_Arm64 { NI_Illegal, NI_Illegal }, // Sve_Arm64 - { FIRST_NI_Vector, LAST_NI_Vector }, #else #error Unsupported platform #endif @@ -1235,7 +1235,8 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI #if defined(TARGET_ARM64) else if ((FIRST_NI_Vector <= id) && (id <= LAST_NI_Vector)) { - return Compiler::compVectorTLength; + assert(Compiler::UseSveForVectorT()); + return Compiler::GetVectorTLength(); } #endif @@ -2012,7 +2013,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - if ((simdSize != 8) && (simdSize != 16) && (simdSize != compVectorTLength)) + if ((simdSize != 8) && (simdSize != 16) && (!SizeMatchesVectorTLength(simdSize))) #elif defined(TARGET_XARCH) if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64)) #endif // TARGET_* diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9fb26c0bba4aeb..ef12edefe791ad 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1638,19 +1638,17 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_CreateSequence: { - //TODO-VL: Check if similar check is needed at other places in this methods. - if (simdSize > 16) - { - op2 = impPopStack().val; - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); - } + assert(Compiler::UseSveForSimdSize(simdSize)); + + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); break; } case NI_Vector_ToScalar: { - if (simdSize > 16) + if (UseSveForSimdSize(simdSize)) { op1 = impSIMDPopStack(); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index bd695edbf5d453..e102012ae1c2ab 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6805,7 +6805,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (genActualType(lclTyp) == TYP_I_IMPL) { impBashVarAddrsToI(op1); - } + } // If this is a local and the local is a ref type, see // if we can improve type information based on the diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 1e697a43f1352a..638e125aaf370f 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -10653,14 +10653,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) uint32_t size = getVectorTByteLength(); assert((size == 16) || (size == 32) || (size == 64)); - bool useAgnosticVL = false; + bool useSizeAgnosticVector = false; #ifdef TARGET_ARM64 - useAgnosticVL = compExactlyDependsOn(InstructionSet_Sve_Arm64) && (size > 16); + useSizeAgnosticVector = UseSveForVectorT(); #endif const char* lookupClassName = className; - if (!useAgnosticVL) + if (!useSizeAgnosticVector) { switch (size) { @@ -10691,7 +10691,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) const char* lookupMethodName = methodName; - if (!useAgnosticVL && ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0'))) + if (!useSizeAgnosticVector && ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0'))) { if (strncmp(methodName + 2, "Vector", 6) == 0) { diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d8585f95dba254..0e54ce81060042 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -383,7 +383,7 @@ CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. #if defined(TARGET_ARM64) -RELEASE_CONFIG_INTEGER(VariableVectorLength, "VariableVectorLength", 0x10) // The preferred decimal bytes for VL +RELEASE_CONFIG_INTEGER(MinVectorLengthForSve, "MinVectorLengthForSve", 0x20) // The preferred decimal bytes for VL #endif // diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index ea0eb7e4ef88b2..3146f8f523fa96 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11693,11 +11693,23 @@ uint32_t CEEJitInfo::getTargetVectorLength() { LIMITED_METHOD_CONTRACT; -#ifdef TARGET_ARM64 - return GetSveLengthFromOS(); -#else - UNREACHABLE(); // only called on Arm64 -#endif + #ifdef TARGET_ARM64 + CORJIT_FLAGS corjitFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); + if (corjitFlags.IsSet(InstructionSet_Sve) || corjitFlags.IsSet(InstructionSet_Sve_Arm64)) + { + return GetSveLengthFromOS(); + } + else if (corjitFlags.IsSet(InstructionSet_AdvSimd) || corjitFlags.IsSet(InstructionSet_AdvSimd_Arm64)) + { + return 16; + } + else + { + return 0; + } + #else + UNREACHABLE(); // only called on Arm64 + #endif } uint32_t CEEJitInfo::getExpectedTargetArchitecture() @@ -14379,13 +14391,23 @@ uint32_t CEEInfo::getExpectedTargetArchitecture() uint32_t CEEInfo::getTargetVectorLength() { - LIMITED_METHOD_CONTRACT; - -#ifdef TARGET_ARM64 - return GetSveLengthFromOS(); -#else - UNREACHABLE(); // only called on Arm64 -#endif + #ifdef TARGET_ARM64 + CORJIT_FLAGS corjitFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); + if (corjitFlags.IsSet(InstructionSet_Sve) || corjitFlags.IsSet(InstructionSet_Sve_Arm64)) + { + return GetSveLengthFromOS(); + } + else if (corjitFlags.IsSet(InstructionSet_AdvSimd) || corjitFlags.IsSet(InstructionSet_AdvSimd_Arm64)) + { + return 16; + } + else + { + return 0; + } + #else + UNREACHABLE(); // only called on Arm64 + #endif } void CEEInfo::setBoundaries(CORINFO_METHOD_HANDLE ftn, ULONG32 cMap, From 9d9b20b9ef515ae510cd2d5aade65700b9f4cd1b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 10 Apr 2025 23:25:59 -0700 Subject: [PATCH 067/120] Fix correct type in LSRA --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d3024af276bf9f..df59ac126a1eea 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -7420,7 +7420,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, // while on x86 we can spill directly to memory. regNumber spillReg = refPosition->assignedReg(); #ifdef TARGET_ARM64 - bool isVariableVL = varTypeIsSIMDVL(tree->TypeGet()); + bool isVariableVL = varTypeIsSIMDVL(varDsc->TypeGet()); bool spillToMem = refPosition->spillAfter || isVariableVL; assert((spillReg != REG_NA) || isVariableVL); #else From 8d8ba75850a38d86c4d1759e6bb1c917e1382a92 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 11 Apr 2025 17:03:58 -0700 Subject: [PATCH 068/120] Introduce for now FakeVectorLength environment variable --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/jitconfigvalues.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fa6e44f0df6a87..ee2017ea5b3f21 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2629,7 +2629,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // For altjit, just use the default 16B // To use SVE: Set DOTNET_SimulatedVLForSve >= DOTNET_MinVectorLengthForSve // To use NEON: Set DOTNET_SimulatedVLForSve < DOTNET_MinVectorLengthForSve - compVectorTLength = 16; + compVectorTLength = ReinterpretHexAsDecimal(JitConfig.FakeVectorLengthForSve()); } compUseSveForVectorT = (compVectorTLength >= compMinVectorTLengthForSve); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 0e54ce81060042..32985b8b74d3d3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -384,6 +384,7 @@ CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, "PreferredVectorBitWidth", 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. #if defined(TARGET_ARM64) RELEASE_CONFIG_INTEGER(MinVectorLengthForSve, "MinVectorLengthForSve", 0x20) // The preferred decimal bytes for VL +CONFIG_INTEGER(FakeVectorLengthForSve, "FakeVectorLengthForSve", 0x10) // The fake decimal bytes for VL (for testing purposes) #endif // From 41c7629779820c5c3e29c78ea11c41eedb8e7c64 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 11 Apr 2025 17:08:04 -0700 Subject: [PATCH 069/120] Convert all checks to use varTypeIsSIMDVL() --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/instr.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 2 +- src/coreclr/jit/regset.cpp | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 089df2890e5055..d4cb90b41ce7a8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2516,7 +2516,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } else { - if (tree->TypeGet() == TYP_SIMD32) + if (varTypeIsSIMDVL(tree->TypeGet())) { simd32_t val = vecCon->gtSimd32Val; if (ElementsAreSame(val.i8, 32)) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ee2017ea5b3f21..7bb097323558e1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -910,7 +910,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 - || (varTypeIsSIMDVL(GetHfaType(clsHnd)) && (SizeMatchesVectorTLength(structSize))) + || SizeMatchesVectorTLength(structSize) #endif ) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3941c53dc5e4d4..9fa6ff411197f6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8225,7 +8225,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (type == TYP_SIMD32) || (type == TYP_SIMD64)); + return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (varTypeIsSIMDVL(type))); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e102012ae1c2ab..ede89612f3c7d4 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3877,7 +3877,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH #ifdef TARGET_ARM64 - if ((simdType == TYP_SIMD32) || (simdType == TYP_SIMD64)) + if (varTypeIsSIMDVL(simdType)) { hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_Sve); } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 26d60f0b142657..90b23cdbb4a790 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2252,7 +2252,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #endif } #ifdef TARGET_ARM64 - else if ((dstType == TYP_SIMD32) || (dstType == TYP_SIMD64)) + else if (varTypeIsSIMDVL(dstType)) { return INS_sve_str; } diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index d2f692ed9e6acf..69eca6e9029ba7 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2070,7 +2070,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert(simdType == TYP_SIMD32); + assert(varTypeIsSIMDVL(simdType)); assert((intrinsicId == NI_Vector_op_Equality) || (intrinsicId == NI_Vector_op_Inequality)); diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 4a750301fa39b9..1239e8cacbfd04 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -606,7 +606,7 @@ var_types RegSet::tmpNormalizeType(var_types type) } #if defined(TARGET_ARM64) - if (type == TYP_SIMD32) + if (varTypeIsSIMDVL(type)) { //TODO-VL: temporary work around to allow scalable registers type = TYP_SIMD16; @@ -693,7 +693,7 @@ void RegSet::tmpPreAllocateTemps(var_types type, unsigned count) unsigned size = genTypeSize(type); #ifdef TARGET_ARM64 - if (type == TYP_SIMD32) + if (varTypeIsSIMDVL(type)) { size = 16; // SIMD registers overlap with SVE registers } From c03bb1c7c6d4ed80e49d9f62dee147719ed858fb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 May 2025 20:47:51 -0700 Subject: [PATCH 070/120] wip --- src/coreclr/jit/codegenarm64.cpp | 6 ++---- src/coreclr/jit/emit.cpp | 4 +++- src/coreclr/jit/emit.h | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9f4870e7d76b76..97209978c65a04 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2365,9 +2365,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Get a temp integer register to compute long address. regNumber addrReg = internalRegisters.GetSingle(tree); CORINFO_FIELD_HANDLE hnd; - simd32_t constValue; - memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd32_t)); - hnd = emit->emitSimd32Const(constValue); + hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); //emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); @@ -2399,7 +2397,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre CORINFO_FIELD_HANDLE hnd; simd64_t constValue; memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); - hnd = emit->emitSimd64Const(constValue); + hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); } } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index d5f9445e3618c4..2cbc9a027c1bc9 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8227,7 +8227,9 @@ CORINFO_FIELD_HANDLE emitter::emitSimdConst(simd_t* constValue, emitAttr attr) UNATIVE_OFFSET cnum = emitDataConst(constValue, cnsSize, cnsAlign, dataType); return emitComp->eeFindJitDataOffs(cnum); } +#endif // TARGET_XARCH || TARGET_ARM64 +#if defined(TARGET_XARCH) //------------------------------------------------------------------------ // emitSimdConstCompressedLoad: Create a simd data section constant, // compressing it if possible, and emit an appropiate instruction @@ -8331,7 +8333,7 @@ void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, reg CORINFO_FIELD_HANDLE hnd = emitSimdConst(constValue, attr); emitIns_R_C(ins, attr, targetReg, hnd, 0); } -#endif // TARGET_XARCH || TARGET_ARM64 +#endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) CORINFO_FIELD_HANDLE emitter::emitSimdMaskConst(simdmask_t constValue) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 619638f502a42b..4f1d26ba10218a 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2659,6 +2659,8 @@ class emitter CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue); #if defined(TARGET_XARCH) || defined(TARGET_ARM64) CORINFO_FIELD_HANDLE emitSimdConst(simd_t* constValue, emitAttr attr); +#endif // TARGET_XARCH || TARGET_ARM64 +#if defined(TARGET_XARCH) void emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) From df8c7abacb2a56c0d55a84a7aa71f7653aaceb4b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 May 2025 20:51:55 -0700 Subject: [PATCH 071/120] gen.bat update --- src/coreclr/inc/corinfoinstructionset.h | 21 ++++++++------- .../JitInterface/CorInfoImpl_generated.cs | 7 ++--- .../JitInterface/CorInfoInstructionSet.cs | 27 +++++++++++-------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 4d9be9554976dd..b303c719bd9868 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -33,16 +33,17 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=15, InstructionSet_Rcpc2=16, InstructionSet_Sve=17, - InstructionSet_ArmBase_Arm64=18, - InstructionSet_AdvSimd_Arm64=19, - InstructionSet_Aes_Arm64=20, - InstructionSet_Crc32_Arm64=21, - InstructionSet_Dp_Arm64=22, - InstructionSet_Rdm_Arm64=23, - InstructionSet_Sha1_Arm64=24, - InstructionSet_Sha256_Arm64=25, - InstructionSet_Sve_Arm64=26, - InstructionSet_Sve2_Arm64=27, + InstructionSet_Sve2=18, + InstructionSet_ArmBase_Arm64=19, + InstructionSet_AdvSimd_Arm64=20, + InstructionSet_Aes_Arm64=21, + InstructionSet_Crc32_Arm64=22, + InstructionSet_Dp_Arm64=23, + InstructionSet_Rdm_Arm64=24, + InstructionSet_Sha1_Arm64=25, + InstructionSet_Sha256_Arm64=26, + InstructionSet_Sve_Arm64=27, + InstructionSet_Sve2_Arm64=28, #endif // TARGET_ARM64 #ifdef TARGET_RISCV64 InstructionSet_RiscV64Base=1, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 05fc12819613ad..ec976a01e67422 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2652,7 +2652,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 178); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 179); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2830,8 +2830,9 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[173] = (delegate* unmanaged)&_recordRelocation; callbacks[174] = (delegate* unmanaged)&_getRelocTypeHint; callbacks[175] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[176] = (delegate* unmanaged)&_getJitFlags; - callbacks[177] = (delegate* unmanaged)&_getSpecialCopyHelper; + callbacks[176] = (delegate* unmanaged)&_getTargetVectorLength; + callbacks[177] = (delegate* unmanaged)&_getJitFlags; + callbacks[178] = (delegate* unmanaged)&_getSpecialCopyHelper; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 4fa97fadc19674..b3865423d89f8a 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -216,17 +216,18 @@ public enum InstructionSet_ARM64 Rcpc = 14, VectorT128 = 15, Rcpc2 = 16, - Sve2 = 17, - ArmBase_Arm64 = 18, - AdvSimd_Arm64 = 19, - Aes_Arm64 = 20, - Crc32_Arm64 = 21, - Dp_Arm64 = 22, - Rdm_Arm64 = 23, - Sha1_Arm64 = 24, - Sha256_Arm64 = 25, - Sve_Arm64 = 26, - Sve2_Arm64 = 27, + Sve = 17, + Sve2 = 18, + ArmBase_Arm64 = 19, + AdvSimd_Arm64 = 20, + Aes_Arm64 = 21, + Crc32_Arm64 = 22, + Dp_Arm64 = 23, + Rdm_Arm64 = 24, + Sha1_Arm64 = 25, + Sha256_Arm64 = 26, + Sve_Arm64 = 27, + Sve2_Arm64 = 28, } public enum InstructionSet_RiscV64 @@ -621,6 +622,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve2)) resultflags.AddInstructionSet(InstructionSet.ARM64_Sve); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector)) + resultflags.AddInstructionSet(InstructionSet.ARM64_Sve); break; case TargetArchitecture.RiscV64: @@ -1065,6 +1068,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.ARM64_Sve); if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve)) resultflags.AddInstructionSet(InstructionSet.ARM64_Sve2); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_Sve)) + resultflags.AddInstructionSet(InstructionSet.ARM64_Vector); break; case TargetArchitecture.RiscV64: From 8ee5339e8271aa3471e8316ba51670f14c12fd35 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 May 2025 11:32:40 -0700 Subject: [PATCH 072/120] Refactor to UseSveFor*() --- src/coreclr/jit/codegenarm64.cpp | 118 +++++++++++++++------------ src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/compiler.cpp | 36 +++----- src/coreclr/jit/compiler.h | 13 ++- src/coreclr/jit/gentree.cpp | 22 ++--- src/coreclr/jit/hwintrinsicarm64.cpp | 4 +- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/instr.cpp | 8 +- src/coreclr/jit/jitconfigvalues.h | 3 +- src/coreclr/jit/lowerarmarch.cpp | 2 +- src/coreclr/jit/lsra.cpp | 8 +- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/regset.cpp | 4 +- src/coreclr/jit/vartype.h | 13 --- 14 files changed, 113 insertions(+), 126 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 97209978c65a04..21792d64a69571 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2330,6 +2330,45 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre break; } case TYP_SIMD32: + { + // Use scalable registers + if (vecCon->IsAllBitsSet()) + { + // Use Scalable_B because for Ones, it doesn't matter. + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, -1, INS_OPTS_SCALABLE_B); + } + else if (vecCon->IsZero()) + { + // Use Scalable_B because for Zero, it doesn't matter. + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, 0, INS_OPTS_SCALABLE_B); + } + else + { + simd32_t val = vecCon->gtSimd32Val; + if (ElementsAreSame(val.i8, 32)) + { + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B); + } + else if (ElementsAreSame(val.i16, 16)) + { + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H); + } + else if (ElementsAreSame(val.i32, 8)) + { + emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S); + } + else + { + // Get a temp integer register to compute long address. + regNumber addrReg = internalRegisters.GetSingle(tree); + CORINFO_FIELD_HANDLE hnd; + hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); + emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); + //emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); + //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); + } + } + } case TYP_SIMD64: { // Use scalable registers @@ -2345,61 +2384,32 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } else { - if (varTypeIsSIMDVL(tree->TypeGet())) + simd64_t val = vecCon->gtSimd64Val; + if (ElementsAreSame(val.i32, 16) && emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) { - simd32_t val = vecCon->gtSimd32Val; - if (ElementsAreSame(val.i8, 32)) - { - emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B); - } - else if (ElementsAreSame(val.i16, 16)) - { - emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H); - } - else if (ElementsAreSame(val.i32, 8)) - { - emit->emitIns_R_I(INS_sve_dup, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S); - } - else - { - // Get a temp integer register to compute long address. - regNumber addrReg = internalRegisters.GetSingle(tree); - CORINFO_FIELD_HANDLE hnd; - hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); - emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); - //emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); - //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); - } + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i16, 32) && + emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_IMM_BITMASK); + } + else if (ElementsAreSame(val.i8, 64) && emitter::isValidSimm<8>(val.i8[0])) + { + emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_IMM_BITMASK); } else { - simd64_t val = vecCon->gtSimd64Val; - if (ElementsAreSame(val.i32, 16) && emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) - { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_IMM_BITMASK); - } - else if (ElementsAreSame(val.i16, 32) && - emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) - { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_IMM_BITMASK); - } - else if (ElementsAreSame(val.i8, 64) && emitter::isValidSimm<8>(val.i8[0])) - { - emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_IMM_BITMASK); - } - else - { - // Get a temp integer register to compute long address. - regNumber addrReg = internalRegisters.GetSingle(tree); - CORINFO_FIELD_HANDLE hnd; - simd64_t constValue; - memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); - hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); - emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); - } + // Get a temp integer register to compute long address. + regNumber addrReg = internalRegisters.GetSingle(tree); + CORINFO_FIELD_HANDLE hnd; + simd64_t constValue; + memcpy(&constValue, &vecCon->gtSimdVal, sizeof(simd64_t)); + hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); + emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); } } break; @@ -5342,7 +5352,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) regNumber tgtReg = node->GetRegNum(); #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (varTypeIsSIMDVL(op1->TypeGet())) + if (Compiler::UseSveForVectorT()) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the @@ -5408,7 +5418,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) assert((varSize == 16) || (Compiler::SizeMatchesVectorTLength(varSize))); regNumber srcReg = node->GetRegNum(); - assert((srcReg != REG_NA) || (varTypeIsSIMDVL(node->TypeGet()))); + assert((srcReg != REG_NA) || (Compiler::UseSveForType(node->TypeGet()))); regNumber lclVarReg = genConsumeReg(lclNode); assert(lclVarReg != REG_NA); @@ -5422,7 +5432,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (varTypeIsSIMDVL(op1->TypeGet())) + if (Compiler::UseSveForVectorT()) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index dacd8c68774521..44b48b0d6fd6bd 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3226,7 +3226,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(node->reg, copyType); #ifdef TARGET_ARM64 - insOpts opts = varTypeIsSIMDVL(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + insOpts opts = Compiler::UseSveForType(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false, opts); #else @@ -3251,7 +3251,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); #ifdef TARGET_ARM64 - insOpts opts = varTypeIsSIMDVL(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + insOpts opts = Compiler::UseSveForType(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true, opts); #else diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ebacf26fe3835c..f845ddddf055b5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -680,7 +680,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS if (SizeMatchesVectorTLength(structSize)) { var_types hfaType = GetHfaType(clsHnd); - return varTypeIsSIMDVL(hfaType) ? hfaType : TYP_UNKNOWN; + return UseSveForType(hfaType) ? hfaType : TYP_UNKNOWN; } #endif } @@ -2162,7 +2162,7 @@ unsigned ReinterpretHexAsDecimal(unsigned in) #ifdef TARGET_ARM64 unsigned Compiler::compVectorTLength = 0; -unsigned Compiler::compMinVectorTLengthForSve = 0; +//unsigned Compiler::compMinVectorTLengthForSve = 0; bool Compiler::compUseSveForVectorT = false; #endif @@ -2608,22 +2608,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #if defined(TARGET_ARM64) - - compMinVectorTLengthForSve = ReinterpretHexAsDecimal(JitConfig.MinVectorLengthForSve()); - bool isInvalidMinVectorTLength = false; - // Should be at least 16B or 128b - isInvalidMinVectorTLength |= (compMinVectorTLengthForSve < 16); - // Should be at most 256B or 2048b - isInvalidMinVectorTLength |= (compMinVectorTLengthForSve > 256); - // Should be power of 2 - isInvalidMinVectorTLength |= ((compMinVectorTLengthForSve & (compMinVectorTLengthForSve - 1)) != 0); - - if (isInvalidMinVectorTLength) - { - // In that case, default it to 32B. - compMinVectorTLengthForSve = 32; - } - if (info.compMatchedVM) { compVectorTLength = info.compCompHnd->getTargetVectorLength(); @@ -2631,19 +2615,21 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (!instructionSetFlags.HasInstructionSet(InstructionSet_Sve) && !instructionSetFlags.HasInstructionSet(InstructionSet_Sve_Arm64)) { - compMinVectorTLengthForSve = UINT_MAX; + compVectorTLength = UINT_MAX; + compUseSveForVectorT = false; + } + else + { + compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); } } else { - // For altjit, just use the default 16B - // To use SVE: Set DOTNET_SimulatedVLForSve >= DOTNET_MinVectorLengthForSve - // To use NEON: Set DOTNET_SimulatedVLForSve < DOTNET_MinVectorLengthForSve - compVectorTLength = ReinterpretHexAsDecimal(JitConfig.FakeVectorLengthForSve()); + // For altjit, use the 32B if we want to test SVE for VectorT, otherwise 16B + compUseSveForVectorT = JitConfig.UseSveForVectorT(); + compVectorTLength = compUseSveForVectorT ? 32 : 16; } - compUseSveForVectorT = (compVectorTLength >= compMinVectorTLengthForSve); - //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b0f2dac082339c..321d280d235ca9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8235,7 +8235,7 @@ class Compiler assert(type != TYP_STRUCT); // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes. - return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (varTypeIsSIMDVL(type))); + return ((type == TYP_SIMD16) || (type == TYP_SIMD12) || (UseSveForType(type))); } #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) #error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE") @@ -8917,7 +8917,7 @@ class Compiler private: static unsigned compVectorTLength; - static unsigned compMinVectorTLengthForSve; + //static unsigned compMinVectorTLengthForSve; static bool compUseSveForVectorT; public: @@ -8929,10 +8929,15 @@ class Compiler { return compUseSveForVectorT; } - FORCEINLINE static bool UseSveForSimdSize(unsigned simdSize) + FORCEINLINE static bool UseSveForType(var_types type) { - return compUseSveForVectorT && (simdSize >= compMinVectorTLengthForSve); + return UseSveForVectorT() && varTypeIsSIMD(type); } + + //FORCEINLINE static bool UseSveForSimdSize(unsigned simdSize) + //{ + // return compUseSveForVectorT && (simdSize >= compMinVectorTLengthForSve); + //} FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) { return simdSize == compVectorTLength; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9094c61600e9e4..2fc329bd15eefe 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21003,7 +21003,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( op2 = gtNewOperNode(GT_NEG, TYP_INT, op2); } - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); } @@ -22476,7 +22476,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else { - assert(UseSveForSimdSize(simdSize)); + assert(UseSveForVectorT()); intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = @@ -22524,7 +22524,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( { intrinsic = NI_Vector128_op_Equality; } - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { intrinsic = NI_Vector_op_Equality; @@ -22686,7 +22686,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); @@ -22735,7 +22735,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else { - assert(UseSveForSimdSize(simdSize)); + assert(UseSveForVectorT()); intrinsic = NI_Vector_op_Inequality; @@ -22812,7 +22812,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { intrinsic = NI_Sve_ConditionalSelect; op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); @@ -22822,7 +22822,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( intrinsic = NI_AdvSimd_BitwiseSelect; } - intrinsic = UseSveForSimdSize(simdSize) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; + intrinsic = UseSveForVectorT() ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -27223,7 +27223,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Sve_AddAcross, simdBaseJitType, simdSize); return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 16); @@ -29770,7 +29770,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N #ifdef TARGET_ARM64 //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places - if (Compiler::UseSveForSimdSize(simdSize)) + if (Compiler::UseSveForVectorT()) { switch (id) { @@ -31162,7 +31162,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; } #elif defined(TARGET_ARM64) - if (Compiler::UseSveForSimdSize(simdSize)) + if (Compiler::UseSveForVectorT()) { id = NI_Sve_CompareNotEqualTo; } @@ -31257,7 +31257,7 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( case GT_GT: case GT_LT: { - if (Compiler::UseSveForSimdSize(simdSize)) + if (Compiler::UseSveForVectorT()) { lookupType = TYP_MASK; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5f2b65d79d3fee..d910cd9ec8a85e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1672,7 +1672,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_CreateSequence: { - assert(Compiler::UseSveForSimdSize(simdSize)); + assert(Compiler::UseSveForVectorT()); op2 = impPopStack().val; op1 = impPopStack().val; @@ -1682,7 +1682,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_ToScalar: { - if (UseSveForSimdSize(simdSize)) + if (UseSveForVectorT()) { op1 = impSIMDPopStack(); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index bed1ec05d4f1eb..d5478d2359c67c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3872,7 +3872,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH #ifdef TARGET_ARM64 - if (varTypeIsSIMDVL(simdType)) + if (UseSveForType(simdType)) { hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_Sve); } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 32601dcdaf64d6..ec06f76198faa2 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1885,7 +1885,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #endif } #ifdef TARGET_ARM64 - else if (varTypeIsSIMDVL(srcType)) + else if (Compiler::UseSveForType(srcType)) { return INS_sve_ldr; } @@ -1979,7 +1979,7 @@ instruction CodeGen::ins_Copy(var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (varTypeIsSIMDVL(dstType)) + else if (Compiler::UseSveForType(dstType)) { return INS_sve_mov; } @@ -2106,7 +2106,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (varTypeIsSIMDVL(dstType)) + else if (Compiler::UseSveForType(dstType)) { return INS_sve_mov; } @@ -2223,7 +2223,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #endif } #ifdef TARGET_ARM64 - else if (varTypeIsSIMDVL(dstType)) + else if (Compiler::UseSveForType(dstType)) { return INS_sve_str; } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ff1b05f1424e96..18639ac91776f5 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -386,8 +386,7 @@ CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) #endif #if defined(TARGET_ARM64) -RELEASE_CONFIG_INTEGER(MinVectorLengthForSve, "MinVectorLengthForSve", 0x20) // The preferred decimal bytes for VL -CONFIG_INTEGER(FakeVectorLengthForSve, "FakeVectorLengthForSve", 0x10) // The fake decimal bytes for VL (for testing purposes) +CONFIG_INTEGER(UseSveForVectorT, "UseSveForVectorT", 0) // Prefer SVE instructions for VectorT #endif // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 1777644c14baee..0bb60027df2e13 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2069,7 +2069,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); var_types simdType = Compiler::getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMDVL(simdType)); + assert(Compiler::UseSveForType(simdType)); assert((intrinsicId == NI_Vector_op_Equality) || (intrinsicId == NI_Vector_op_Inequality)); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b83e6166d277c4..d84a403b4747ba 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -6041,7 +6041,7 @@ void LinearScan::allocateRegisters() lclVarInterval->isPartiallySpilled = true; } #elif defined(TARGET_ARM64) - else if (varTypeIsSIMDVL(lclVarInterval->registerType)) + else if (Compiler::UseSveForType(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -6061,7 +6061,7 @@ void LinearScan::allocateRegisters() { lclVarInterval->isPartiallySpilled = false; #if defined(TARGET_ARM64) - if (varTypeIsSIMDVL(lclVarInterval->registerType)) + if (Compiler::UseSveForType(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -7543,7 +7543,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, // while on x86 we can spill directly to memory. regNumber spillReg = refPosition->assignedReg(); #ifdef TARGET_ARM64 - bool isVariableVL = varTypeIsSIMDVL(varDsc->TypeGet()); + bool isVariableVL = Compiler::UseSveForType(varDsc->TypeGet()); bool spillToMem = refPosition->spillAfter || isVariableVL; assert((spillReg != REG_NA) || isVariableVL); #else @@ -7646,7 +7646,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, simdUpperRestore->gtFlags |= GTF_NOREG_AT_USE; #else simdUpperRestore->gtFlags |= GTF_SPILLED; - assert((refPosition->assignedReg() != REG_NA) || (varTypeIsSIMDVL(restoreLcl->TypeGet()))); + assert((refPosition->assignedReg() != REG_NA) || (Compiler::UseSveForType(restoreLcl->TypeGet()))); restoreReg = refPosition->assignedReg(); #endif } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 417e25682d3deb..fcaa724a9f811a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1484,7 +1484,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, #ifdef TARGET_XARCH forceRegOptional = true; #elif TARGET_ARM64 - forceRegOptional = varTypeIsSIMDVL(tree->TypeGet()); + forceRegOptional = Compiler::UseSveForType(tree->TypeGet()); #endif if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars)) { diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 1239e8cacbfd04..8687551b805218 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -606,7 +606,7 @@ var_types RegSet::tmpNormalizeType(var_types type) } #if defined(TARGET_ARM64) - if (varTypeIsSIMDVL(type)) + if (Compiler::UseSveForType(type)) { //TODO-VL: temporary work around to allow scalable registers type = TYP_SIMD16; @@ -693,7 +693,7 @@ void RegSet::tmpPreAllocateTemps(var_types type, unsigned count) unsigned size = genTypeSize(type); #ifdef TARGET_ARM64 - if (varTypeIsSIMDVL(type)) + if (Compiler::UseSveForType(type)) { size = 16; // SIMD registers overlap with SVE registers } diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 12d72d94ec379c..e214d1f8a346ff 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -64,19 +64,6 @@ inline var_types TypeGet(var_types v) return v; } -#ifdef TARGET_ARM64 -inline bool varTypeIsSIMDVL(var_types vt) -{ -#ifdef FEATURE_SIMD - return (vt == TYP_SIMD32) || (vt == TYP_SIMD64); -#else - // Always return false if FEATURE_SIMD is not enabled - return false; -#endif -} -#endif // TARGET_ARM64 - - template inline bool varTypeIsSIMD(T vt) { From abd6e214f9aa2047c54fae8110550f553bc13515 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 May 2025 14:02:09 -0700 Subject: [PATCH 073/120] build failure --- src/coreclr/jit/compiler.cpp | 7 ++++++- src/coreclr/vm/class.cpp | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f845ddddf055b5..e6a8b9cce38c0f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -909,11 +909,12 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. if (canReturnInRegister && (useType == TYP_UNKNOWN) && - (structSize <= MAX_PASS_SINGLEREG_BYTES) + ((structSize <= MAX_PASS_SINGLEREG_BYTES) #ifdef TARGET_ARM64 || SizeMatchesVectorTLength(structSize) #endif ) + ) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -2626,7 +2627,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags) else { // For altjit, use the 32B if we want to test SVE for VectorT, otherwise 16B +#ifdef DEBUG compUseSveForVectorT = JitConfig.UseSveForVectorT(); +#else + compUseSveForVectorT = false; +#endif compVectorTLength = compUseSveForVectorT ? 32 : 16; } diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index 707c87c5cd3733..2668b3b7122162 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -1751,7 +1751,7 @@ CorInfoHFAElemType MethodTable::GetHFAType() } else { - assert ("Invalid vectorSize"); + _ASSERTE("Invalid vectorSize"); return CORINFO_HFA_ELEM_VECTOR128; } } @@ -1878,7 +1878,7 @@ EEClass::CheckForHFA() #endif // TARGET_ARM64 else { - assert ("Invalid element size %u", thisElemSize); + _ASSERTE ("Invalid element size"); fieldHFAType = CORINFO_HFA_ELEM_VECTOR128; } } From c212d255571ddde7325c46ef93887d98d7fa1ac2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 May 2025 15:46:48 -0700 Subject: [PATCH 074/120] more build failure fix --- src/coreclr/jit/codegenarm64.cpp | 1 + src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs | 1 - src/coreclr/vm/arm64/profiler.cpp | 2 +- src/coreclr/vm/classlayoutinfo.cpp | 1 - 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 21792d64a69571..b92bcffa566dfe 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2368,6 +2368,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); } } + break; } case TYP_SIMD64: { diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 974d51a520af23..9e75ee782fab1b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -719,7 +719,6 @@ public enum CorInfoHFAElemType CORINFO_HFA_ELEM_DOUBLE, CORINFO_HFA_ELEM_VECTOR64, CORINFO_HFA_ELEM_VECTOR128, - CORINFO_HFA_ELEM_VECTOR_VL, } /* data to optimize delegate construction */ diff --git a/src/coreclr/vm/arm64/profiler.cpp b/src/coreclr/vm/arm64/profiler.cpp index f454f689eee721..8adaeb283c1eae 100644 --- a/src/coreclr/vm/arm64/profiler.cpp +++ b/src/coreclr/vm/arm64/profiler.cpp @@ -282,7 +282,7 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void) { CorInfoHFAElemType hfaElemType = thReturnValueType.GetHFAType(); - if ((hfaElemType == CORINFO_HFA_ELEM_VECTOR128) || (hfaElemType == CORINFO_HFA_ELEM_VECTOR_VL)) + if (hfaElemType == CORINFO_HFA_ELEM_VECTOR128) { return &pData->floatArgumentRegisters.q[0]; } diff --git a/src/coreclr/vm/classlayoutinfo.cpp b/src/coreclr/vm/classlayoutinfo.cpp index 87dc3201c3fe7a..0b0b54005cdba3 100644 --- a/src/coreclr/vm/classlayoutinfo.cpp +++ b/src/coreclr/vm/classlayoutinfo.cpp @@ -1190,7 +1190,6 @@ CorInfoHFAElemType EEClassNativeLayoutInfo::GetNativeHFATypeRaw() const #ifdef TARGET_ARM64 case CORINFO_HFA_ELEM_VECTOR64: elemSize = 8; break; case CORINFO_HFA_ELEM_VECTOR128: elemSize = 16; break; - case CORINFO_HFA_ELEM_VECTOR_VL: elemSize = g_sve_length; break; //TODO-VL: Need to cache this #endif default: _ASSERTE(!"Invalid HFA Type"); } From 7b11bebd7695e18d8f5befef0dd318cd0557a8c4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 May 2025 17:01:39 -0700 Subject: [PATCH 075/120] more build failure --- src/coreclr/vm/codeman.cpp | 1 - src/coreclr/vm/codeman.h | 4 ---- src/coreclr/vm/jitinterface.cpp | 4 ++-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 9fc46cfa2d2ee3..06ab31bc0e4ffc 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1519,7 +1519,6 @@ void EEJitManager::SetCpuInfo() { uint32_t maxVectorTLength = (maxVectorTBitWidth / 8); uint64_t sveLengthFromOS = GetSveLengthFromOS(); - g_sve_length = sveLengthFromOS; // For now, enable SVE only when the system vector length is 16 bytes (128-bits) // TODO: https://github.com/dotnet/runtime/issues/101477 diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index c4a5515c01c1b7..5d5a0dc50d2b1b 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -91,10 +91,6 @@ class EECodeInfo; #define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1)) #define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1)) -#ifdef TARGET_ARM64 -extern uint64_t g_sve_length; -#endif - enum StubCodeBlockKind : int { STUB_CODE_BLOCK_UNKNOWN = 0, diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index c1a224fa358d58..fce691b0ef8e44 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11897,7 +11897,7 @@ uint32_t CEEJitInfo::getTargetVectorLength() CORJIT_FLAGS corjitFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); if (corjitFlags.IsSet(InstructionSet_Sve) || corjitFlags.IsSet(InstructionSet_Sve_Arm64)) { - return GetSveLengthFromOS(); + return (uint32_t)GetSveLengthFromOS(); } else if (corjitFlags.IsSet(InstructionSet_AdvSimd) || corjitFlags.IsSet(InstructionSet_AdvSimd_Arm64)) { @@ -15051,7 +15051,7 @@ uint32_t CEEInfo::getTargetVectorLength() CORJIT_FLAGS corjitFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); if (corjitFlags.IsSet(InstructionSet_Sve) || corjitFlags.IsSet(InstructionSet_Sve_Arm64)) { - return GetSveLengthFromOS(); + return (uint32_t)GetSveLengthFromOS(); } else if (corjitFlags.IsSet(InstructionSet_AdvSimd) || corjitFlags.IsSet(InstructionSet_AdvSimd_Arm64)) { From 5dcd5e91b8d43ce17072fdb4f5c6bc2466973eb5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 22 May 2025 16:18:39 -0700 Subject: [PATCH 076/120] Handle vector length in methodtablebuilder --- src/coreclr/inc/clrconfigvalues.h | 2 ++ src/coreclr/vm/methodtablebuilder.cpp | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 3658e5c5c61650..e803bec351eff0 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -285,6 +285,8 @@ CONFIG_DWORD_INFO(INTERNAL_GCUseGlobalAllocationContext, W("GCUseGlobalAllocatio /// CONFIG_DWORD_INFO(INTERNAL_JitBreakEmit, W("JitBreakEmit"), (DWORD)-1, "") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitDebuggable, W("JitDebuggable"), 0, "If set, suppress JIT optimizations that make debugging code difficult") +CONFIG_DWORD_INFO(INTERNAL_UseSveForVectorT, W("UseSveForVectorT"), 0, "Prefer SVE instructions for VectorT") + #if !defined(DEBUG) && !defined(_DEBUG) #define INTERNAL_JitEnableNoWayAssert_Default 0 #else diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 31219baaccc8b9..e989d57f474c33 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1159,6 +1159,10 @@ MethodTableBuilder::CopyParentVtable() } } +#ifdef TARGET_ARM64 +extern "C" uint64_t GetSveLengthFromOS(); +#endif + //******************************************************************************* // Determine if this is the special SIMD type System.Numerics.Vector, whose // size is determined dynamically based on the hardware and the presence of JIT @@ -1203,10 +1207,17 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() #elif defined(TARGET_ARM64) if (CPUCompileFlags.IsSet(InstructionSet_Sve_Arm64)) { - // TODO-VL: This should use GetSveLengthFromOS() - // Probably use CLRConfig::XXX environment variable - // for testing - numInstanceFieldBytes = 32; +#ifdef _DEBUG + if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseSveForVectorT) != 0) + { + // For testing purpose, pretend the vector length is 32 bytes + numInstanceFieldBytes = 32; + } + else +#endif + { + numInstanceFieldBytes = (uint32_t)GetSveLengthFromOS(); + } } #endif // TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 From c6c667118ba87bce878f43468e67840071a8b768 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 22 May 2025 23:20:49 -0700 Subject: [PATCH 077/120] simplify the logic of UseSveForVectorT --- src/coreclr/jit/compiler.cpp | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e6a8b9cce38c0f..d0bc53bcff4c62 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2609,30 +2609,20 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #if defined(TARGET_ARM64) - if (info.compMatchedVM) - { - compVectorTLength = info.compCompHnd->getTargetVectorLength(); - CORINFO_InstructionSetFlags instructionSetFlags = jitFlags->GetInstructionSetFlags(); - if (!instructionSetFlags.HasInstructionSet(InstructionSet_Sve) && !instructionSetFlags.HasInstructionSet(InstructionSet_Sve_Arm64)) - { - compVectorTLength = UINT_MAX; - compUseSveForVectorT = false; - } - else - { - compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); - } +#ifdef DEBUG + compUseSveForVectorT = JitConfig.UseSveForVectorT(); + if (compUseSveForVectorT) + { + // In test mode, if UseSveForVectorT=1, then mimic that + // we are generating for VL > 16B + compVectorTLength = 32; } else +#endif // DEBUG { - // For altjit, use the 32B if we want to test SVE for VectorT, otherwise 16B -#ifdef DEBUG - compUseSveForVectorT = JitConfig.UseSveForVectorT(); -#else - compUseSveForVectorT = false; -#endif - compVectorTLength = compUseSveForVectorT ? 32 : 16; + compVectorTLength = info.compCompHnd->getTargetVectorLength(); + compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); } //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; From a4d5a9b363db0be02ca5dc440f5c68e66633c8cf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 23 May 2025 10:32:23 -0700 Subject: [PATCH 078/120] minor cleanup --- src/coreclr/jit/compiler.cpp | 9 ++------- src/coreclr/jit/compiler.h | 14 +++++++------- src/coreclr/jit/compiler.hpp | 4 ++-- src/coreclr/jit/emit.cpp | 4 ++-- src/coreclr/jit/emit.h | 4 ++-- src/coreclr/jit/gentree.cpp | 2 -- src/coreclr/jit/typelist.h | 3 --- src/coreclr/vm/codeman.h | 1 + 8 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d0bc53bcff4c62..555a3599d3562f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -104,7 +104,7 @@ inline bool _our_GetThreadCycles(uint64_t* cycleOut) #endif // which host OS -BYTE genTypeSizes[] = { +const BYTE genTypeSizes[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz, #include "typelist.h" #undef DEF_TP @@ -116,7 +116,7 @@ const BYTE genTypeAlignments[] = { #undef DEF_TP }; -BYTE genTypeStSzs[] = { +const BYTE genTypeStSzs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) st, #include "typelist.h" #undef DEF_TP @@ -2624,11 +2624,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) compVectorTLength = info.compCompHnd->getTargetVectorLength(); compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); } - - //genTypeSizes[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength; - //emitTypeSizes[TYP_SIMDVL] = (unsigned short)Compiler::compVectorTLength; - //emitTypeActSz[TYP_SIMDVL] = EA_SCALABLE; - //genTypeStSzs[TYP_SIMDVL] = (BYTE)Compiler::compVectorTLength / sizeof(int); #endif // TARGET_ARM64 bool enableInliningMethodsWithEH = JitConfig.JitInlineMethodsWithEH() > 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 321d280d235ca9..48f78894438261 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8934,10 +8934,6 @@ class Compiler return UseSveForVectorT() && varTypeIsSIMD(type); } - //FORCEINLINE static bool UseSveForSimdSize(unsigned simdSize) - //{ - // return compUseSveForVectorT && (simdSize >= compMinVectorTLengthForSve); - //} FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) { return simdSize == compVectorTLength; @@ -9282,6 +9278,10 @@ class Compiler return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) + // TODO-VL: There are several optimizations that use this method + // to decide to use higher vector length. E.g. ReadUtf8, Memmove, etc. + // To make them functional, some of them need SVE2 intrinsics/instructions. + // We will incrementally enable them as we add support for SVE2 APIs. //if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) //{ // return Compiler::compVectorTLength; @@ -9667,7 +9667,7 @@ class Compiler } private: - // Returns true if the TYP_SIMDVL locals on stack are aligned at their + // Returns true if the TYP_SIMD locals on stack are aligned at their // preferred byte boundary specified by getSIMDTypeAlignment(). // // As per the Intel manual, the preferred alignment for AVX vectors is @@ -12678,9 +12678,9 @@ const instruction INS_BREAKPOINT = INS_ebreak; /*****************************************************************************/ -extern BYTE genTypeSizes[]; +extern const BYTE genTypeSizes[]; extern const BYTE genTypeAlignments[]; -extern BYTE genTypeStSzs[]; +extern const BYTE genTypeStSzs[]; extern const BYTE genActualTypes[]; /*****************************************************************************/ diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 017ab832236d45..225594d153860a 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1139,7 +1139,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) * Return the size in bytes of the given type. */ -extern BYTE genTypeSizes[TYP_COUNT]; +extern const BYTE genTypeSizes[TYP_COUNT]; template inline unsigned genTypeSize(T value) @@ -1155,7 +1155,7 @@ inline unsigned genTypeSize(T value) * returns 1 for 32-bit types and 2 for 64-bit types. */ -extern BYTE genTypeStSzs[TYP_COUNT]; +extern const BYTE genTypeStSzs[TYP_COUNT]; template inline unsigned genTypeStSz(T value) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 2cbc9a027c1bc9..c823ebb1ade3b3 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -595,13 +595,13 @@ void emitterStats(FILE* fout) /*****************************************************************************/ -unsigned short emitTypeSizes[] = { +const unsigned short emitTypeSizes[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sze, #include "typelist.h" #undef DEF_TP }; -unsigned short emitTypeActSz[] = { +const unsigned short emitTypeActSz[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) asze, #include "typelist.h" #undef DEF_TP diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 4f1d26ba10218a..649c8b888a4ac6 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3797,7 +3797,7 @@ inline unsigned emitter::emitSpecifiedOffset(unsigned insCount, unsigned igSize) return codePos; } -extern unsigned short emitTypeSizes[TYP_COUNT]; +extern const unsigned short emitTypeSizes[TYP_COUNT]; template inline emitAttr emitTypeSize(T type) @@ -3807,7 +3807,7 @@ inline emitAttr emitTypeSize(T type) return (emitAttr)emitTypeSizes[TypeGet(type)]; } -extern unsigned short emitTypeActSz[TYP_COUNT]; +extern const unsigned short emitTypeActSz[TYP_COUNT]; template inline emitAttr emitActualTypeSize(T type) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2fc329bd15eefe..685281cfcc02d7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22514,8 +22514,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() == Vector128.AllBitsSet - // TODO-VL: Such checks might not work for DOTNET_MinVectorForSve, where we - // set DOTNET_MinVectorForSve=16 for testing purposes. if (simdSize == 8) { intrinsic = NI_Vector64_op_Equality; diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 2f1f22fcf29521..ac8fbafa7acb10 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -60,9 +60,6 @@ DEF_TP(STRUCT ,"struct" , TYP_STRUCT, 0, 0, 0, 1, 4, VTR_INT, available DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, 8, 8, 8, 2, 8, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, 12,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) -#if defined(TARGET_ARM64) -//DEF_TP(SIMDVL ,"simdVL" , TYP_SIMDVL, -1,-1, -1, -1,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) -#endif // TARGET_ARM64 #if defined(TARGET_XARCH) || defined(TARGET_ARM64) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 5d5a0dc50d2b1b..edad07c4341f8b 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -91,6 +91,7 @@ class EECodeInfo; #define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1)) #define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1)) + enum StubCodeBlockKind : int { STUB_CODE_BLOCK_UNKNOWN = 0, From c2e5c23f9d1d3e4575f2060bf33e287a07cc9233 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 25 May 2025 08:07:54 -0700 Subject: [PATCH 079/120] jit format --- src/coreclr/jit/codegenarm64.cpp | 19 +++++++-------- src/coreclr/jit/compiler.cpp | 8 +++---- src/coreclr/jit/compiler.h | 26 ++++++++++----------- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/gentree.cpp | 22 ++++++++--------- src/coreclr/jit/hwintrinsicarm64.cpp | 13 +++++------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 25 +++++++++++++------- src/coreclr/jit/importer.cpp | 4 ++-- src/coreclr/jit/importercalls.cpp | 3 ++- src/coreclr/jit/instr.cpp | 3 ++- src/coreclr/jit/lower.cpp | 6 ++--- src/coreclr/jit/lowerarmarch.cpp | 15 +++++++----- src/coreclr/jit/lsra.cpp | 4 ++-- src/coreclr/jit/regset.cpp | 3 +-- 14 files changed, 81 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index b92bcffa566dfe..39ddf5753ea73e 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2364,8 +2364,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre CORINFO_FIELD_HANDLE hnd; hnd = emit->emitSimdConst(&vecCon->gtSimdVal, emitTypeSize(tree->TypeGet())); emit->emitIns_R_C(INS_sve_ldr, attr, targetReg, addrReg, hnd, 0); - //emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); - //emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, INS_OPTS_SCALABLE_B); + // emit->emitIns_R_C(INS_adr, EA_8BYTE, addrReg, REG_NA, hnd, 0); + // emit->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, targetReg, REG_P1, addrReg, 0, + // INS_OPTS_SCALABLE_B); } } break; @@ -2389,18 +2390,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (ElementsAreSame(val.i32, 16) && emitter::isValidSimm_MultipleOf<8, 256>(val.i32[0])) { emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i32[0], INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_IMM_BITMASK); + INS_SCALABLE_OPTS_IMM_BITMASK); } - else if (ElementsAreSame(val.i16, 32) && - emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) + else if (ElementsAreSame(val.i16, 32) && emitter::isValidSimm_MultipleOf<8, 256>(val.i16[0])) { emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i16[0], INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_IMM_BITMASK); + INS_SCALABLE_OPTS_IMM_BITMASK); } else if (ElementsAreSame(val.i8, 64) && emitter::isValidSimm<8>(val.i8[0])) { emit->emitIns_R_I(INS_sve_mov, EA_SCALABLE, targetReg, val.i8[0], INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_IMM_BITMASK); + INS_SCALABLE_OPTS_IMM_BITMASK); } else { @@ -3043,7 +3043,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) emitAttr attr = emitActualTypeSize(targetType); if (attr == EA_SCALABLE) { - //TODO-VL: Should we check the baseType or it doesn't matter because it is just reg->reg move + // TODO-VL: Should we check the baseType or it doesn't matter because it is just reg->reg move GetEmitter()->emitIns_Mov(INS_sve_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired, INS_OPTS_SCALABLE_Q); } @@ -3051,7 +3051,6 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) { GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired); } - } /*********************************************************************************************** @@ -5415,7 +5414,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); + unsigned varSize = emitTypeSize(varDsc->GetRegisterType(lclNode)); assert((varSize == 16) || (Compiler::SizeMatchesVectorTLength(varSize))); regNumber srcReg = node->GetRegNum(); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index c92f9d93fe51fc..aca92911e46530 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -914,8 +914,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, #ifdef TARGET_ARM64 || SizeMatchesVectorTLength(structSize) #endif - ) - ) + )) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -2164,7 +2163,7 @@ unsigned ReinterpretHexAsDecimal(unsigned in) #ifdef TARGET_ARM64 unsigned Compiler::compVectorTLength = 0; -//unsigned Compiler::compMinVectorTLengthForSve = 0; +// unsigned Compiler::compMinVectorTLengthForSve = 0; bool Compiler::compUseSveForVectorT = false; #endif @@ -2608,7 +2607,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif // DEBUG } - #if defined(TARGET_ARM64) #ifdef DEBUG @@ -2622,7 +2620,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) else #endif // DEBUG { - compVectorTLength = info.compCompHnd->getTargetVectorLength(); + compVectorTLength = info.compCompHnd->getTargetVectorLength(); compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); } #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6eedefde93f60d..2eff74fe8a21e4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8931,7 +8931,7 @@ class Compiler private: static unsigned compVectorTLength; - //static unsigned compMinVectorTLengthForSve; + // static unsigned compMinVectorTLengthForSve; static bool compUseSveForVectorT; public: @@ -9296,11 +9296,11 @@ class Compiler // to decide to use higher vector length. E.g. ReadUtf8, Memmove, etc. // To make them functional, some of them need SVE2 intrinsics/instructions. // We will incrementally enable them as we add support for SVE2 APIs. - //if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) + // if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) //{ // return Compiler::compVectorTLength; //} - //else + // else if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) { return FP_REGSIZE_BYTES; @@ -9409,16 +9409,16 @@ class Compiler // Return 0 if size is even less than XMM, otherwise - XMM return (size >= XMM_REGSIZE_BYTES) ? XMM_REGSIZE_BYTES : 0; #elif defined(TARGET_ARM64) - //if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) + // if (FP_REGSIZE_BYTES < Compiler::compVectorTLength) //{ - // if (size >= Compiler::compVectorTLength) - // { - // return Compiler::compVectorTLength; - // } - //} - //else - //TODO-VL: For now, disable most of the optimizations like memmove, struct copy, - // etc. for VL + // if (size >= Compiler::compVectorTLength) + // { + // return Compiler::compVectorTLength; + // } + // } + // else + // TODO-VL: For now, disable most of the optimizations like memmove, struct copy, + // etc. for VL { assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); } @@ -9591,7 +9591,7 @@ class Compiler #if defined(TARGET_ARM64) // For now, just use SIMD register size for unroll threshold // decisions - //maxRegSize = getPreferredVectorByteLength(); + // maxRegSize = getPreferredVectorByteLength(); maxRegSize = FP_REGSIZE_BYTES; #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 649c8b888a4ac6..45cbdcfa6df2b2 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2661,7 +2661,7 @@ class emitter CORINFO_FIELD_HANDLE emitSimdConst(simd_t* constValue, emitAttr attr); #endif // TARGET_XARCH || TARGET_ARM64 #if defined(TARGET_XARCH) - void emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg); + void emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, regNumber targetReg); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) CORINFO_FIELD_HANDLE emitSimdMaskConst(simdmask_t constValue); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a9bff89b274873..2f6c6e429bf3c4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20995,7 +20995,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( if (UseSveForVectorT()) { - op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + op2 = + gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); } else { @@ -22489,9 +22490,9 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // However, NOT() operation only operates on "byte" variant i.e. `p1.B`, while the result of `p1` from // `SVE_CMP_CC` can be of other variants like `p1.S` or `p1.D`, etc. GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } break; } @@ -22738,8 +22739,8 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( // if r1 != 0 return true else false GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } @@ -22803,7 +22804,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( if (UseSveForVectorT()) { intrinsic = NI_Sve_ConditionalSelect; - op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); } else { @@ -29185,7 +29186,7 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) // genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar) { - //TODO-VL: Update this method with SVE_ intrinsics as well + // TODO-VL: Update this method with SVE_ intrinsics as well *isScalar = false; switch (id) @@ -29747,7 +29748,6 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } } - //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // @@ -29756,8 +29756,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N NamedIntrinsic sveId = id; #ifdef TARGET_ARM64 - //TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places - + // TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places + if (Compiler::UseSveForVectorT()) { switch (id) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 87d64820355d56..17e619e0f43047 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1674,8 +1674,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(Compiler::UseSveForVectorT()); - op2 = impPopStack().val; - op1 = impPopStack().val; + op2 = impPopStack().val; + op1 = impPopStack().val; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); break; } @@ -1704,8 +1704,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_get_Indices: { GenTree* start = gtNewIconNode(0, TYP_INT); - GenTree* step = gtNewIconNode(1, TYP_INT); - retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); + GenTree* step = gtNewIconNode(1, TYP_INT); + retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); break; } case NI_Vector64_get_Indices: @@ -2146,7 +2146,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Max: - //case NI_Vector_MaxNumber: + // case NI_Vector_MaxNumber: case NI_Vector64_Max: case NI_Vector128_Max: { @@ -2178,7 +2178,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Min: - //case NI_Vector_MinNumber: + // case NI_Vector_MinNumber: case NI_Vector64_Min: case NI_Vector128_Min: { @@ -3539,7 +3539,6 @@ GenTree* Compiler::gtNewSimdAllFalseMaskNode(CorInfoType simdBaseJitType, unsign return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskAll, simdBaseJitType, simdSize); } - //------------------------------------------------------------------------ // gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false // diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index c9b2b87fae9196..b36c064982e55d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2663,26 +2663,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if ((op1Reg == REG_NA) && (op2Reg == REG_NA)) { int start = (int)intrin.op1->AsIntCon()->gtIconVal; - int step = (int)intrin.op2->AsIntCon()->gtIconVal; - GetEmitter()->emitInsSve_R_I_I(ins, EA_SCALABLE, targetReg, start, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + int step = (int)intrin.op2->AsIntCon()->gtIconVal; + GetEmitter()->emitInsSve_R_I_I(ins, EA_SCALABLE, targetReg, start, step, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else if ((op1Reg != REG_NA) && (op2Reg != REG_NA)) { emitAttr scalarSize = emitActualTypeSize(node->GetSimdBaseType()); - GetEmitter()->emitInsSve_R_R_R(ins, scalarSize, targetReg, op1Reg, op2Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitInsSve_R_R_R(ins, scalarSize, targetReg, op1Reg, op2Reg, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else if (op1Reg != REG_NA) { assert(op2Reg == REG_NA); int step = (int)intrin.op2->AsIntCon()->gtIconVal; - GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op1Reg, step, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op1Reg, step, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else { assert(op1Reg == REG_NA); int start = (int)intrin.op1->AsIntCon()->gtIconVal; - GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op2Reg, start, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)), INS_SCALABLE_OPTS_IMM_FIRST); + GetEmitter()->emitInsSve_R_R_I(ins, EA_SCALABLE, targetReg, op2Reg, start, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)), + INS_SCALABLE_OPTS_IMM_FIRST); } break; } @@ -2690,17 +2695,21 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { if (op1Reg == REG_NA) { - GetEmitter()->emitIns_R_I(ins, emitTypeSize(intrin.baseType), targetReg, intrin.op1->AsIntCon()->IconValue(), emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitIns_R_I(ins, emitTypeSize(intrin.baseType), targetReg, + intrin.op1->AsIntCon()->IconValue(), + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else { if (varTypeIsIntegral(intrin.op1)) { - GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } else { - GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, 0, emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); + GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, 0, + emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType))); } } break; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f61c3aec923b8f..243705f93e6aad 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3906,7 +3906,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH #ifdef TARGET_ARM64 - if (UseSveForType(simdType)) + if (UseSveForType(simdType)) { hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_Sve); } @@ -6909,7 +6909,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (genActualType(lclTyp) == TYP_I_IMPL) { impBashVarAddrsToI(op1); - } + } // If this is a local and the local is a ref type, see // if we can improve type information based on the diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 1b245dfba11f1f..800080790f5556 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -10927,7 +10927,8 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) const char* lookupMethodName = methodName; - if (!useSizeAgnosticVector && ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0'))) + if (!useSizeAgnosticVector && + ((strncmp(methodName, "As", 2) == 0) && (methodName[2] != '\0'))) { if (strncmp(methodName + 2, "Vector", 6) == 0) { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index ec06f76198faa2..df51afb61c5741 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -535,7 +535,8 @@ void CodeGen::inst_Mov(var_types dstType, #ifdef TARGET_ARM GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); #elif defined(TARGET_ARM64) - GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, size == EA_SCALABLE ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE); + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, + size == EA_SCALABLE ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE); #else GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); #endif diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 94d8dcfff6f35b..f8cf8c4777e43f 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -9961,9 +9961,9 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) case TYP_SIMD16: tryReusingPrevValue = true; break; -#endif // TARGET_AMD64 -#endif // FEATURE_HW_INTRINSICS -#endif // TARGET_64BIT +#endif // TARGET_AMD64 +#endif // FEATURE_HW_INTRINSICS +#endif // TARGET_64BIT // TYP_FLOAT and TYP_DOUBLE aren't needed here - they're expected to // be converted to TYP_INT/TYP_LONG for constant value. diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 18688bd3a78cc4..74d57ac374ed83 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2194,10 +2194,10 @@ GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps GenTree* cmpResult = node->Op(1); LowerNode(cmpResult); - GenTree* allTrue = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + GenTree* allTrue = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); GenTree* activeElemCnt = comp->gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); - GenTree* cntNode = comp->gtNewIconNode(0, TYP_LONG); + simdBaseJitType, simdSize); + GenTree* cntNode = comp->gtNewIconNode(0, TYP_LONG); BlockRange().InsertBefore(node, allTrue); BlockRange().InsertBefore(node, activeElemCnt); BlockRange().InsertBefore(node, cntNode); @@ -2208,7 +2208,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps LowerNode(cmp); node->ChangeOper(cmpOp); - node->gtType = TYP_INT; + node->gtType = TYP_INT; node->AsOp()->gtOp1 = activeElemCnt; node->AsOp()->gtOp2 = cntNode; @@ -4296,7 +4296,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_ShiftLeftLogicalImm: { assert(!hasImmediateOperand); - if (intrin.op2->IsCnsIntOrI() && emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), emitTypeSize(intrin.baseType), false)) + if (intrin.op2->IsCnsIntOrI() && + emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), + emitTypeSize(intrin.baseType), false)) { MakeSrcContained(node, intrin.op2); } @@ -4306,7 +4308,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_ShiftRightLogicalImm: { assert(!hasImmediateOperand); - if (intrin.op2->IsCnsIntOrI() && emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), emitTypeSize(intrin.baseType), true)) + if (intrin.op2->IsCnsIntOrI() && emitter::isValidVectorShiftAmount(intrin.op2->AsIntCon()->IconValue(), + emitTypeSize(intrin.baseType), true)) { MakeSrcContained(node, intrin.op2); } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d84a403b4747ba..199b76a2c20da8 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -10782,9 +10782,9 @@ void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDes { if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED) { - + #ifdef TARGET_ARM64 -//TODO-VL: Evaluate this + // TODO-VL: Evaluate this assert(tree->gtHasReg(compiler) || (tree->OperIs(GT_INTRINSIC) && (tree->AsIntrinsic()->gtIntrinsicName == NI_SIMD_UpperRestore))); #else diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 8687551b805218..9bfa2da2731b05 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -608,14 +608,13 @@ var_types RegSet::tmpNormalizeType(var_types type) #if defined(TARGET_ARM64) if (Compiler::UseSveForType(type)) { - //TODO-VL: temporary work around to allow scalable registers + // TODO-VL: temporary work around to allow scalable registers type = TYP_SIMD16; } #endif #endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) - return type; } From be418ae8dbffdf696c9be6347874cfa7b5f93bf4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 May 2025 10:27:32 -0700 Subject: [PATCH 080/120] resolve merge conflict --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- .../tools/Common/JitInterface/CorInfoImpl_generated.cs | 10 ++++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 3827142a22addf..12468e087b1f5a 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* dd603e43-c783-40e0-b7da-42585a9befb7 */ - 0xdd603e43, - 0xc783, - 0x40e0, - {0xb7, 0xda, 0x42, 0x58, 0x5a, 0x9b, 0xef, 0xb7} +constexpr GUID JITEEVersionIdentifier = { /* 49287d16-74bd-42e9-9d47-132d7a5f67eb */ + 0x49287d16, + 0x74bd, + 0x42e9, + {0x9d, 0x47, 0x13, 0x2d, 0x7a, 0x5f, 0x67, 0xeb} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 9cd41210143449..af3680dceecd2d 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2622,7 +2622,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 179); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 177); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2798,11 +2798,9 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[171] = (delegate* unmanaged)&_recordRelocation; callbacks[172] = (delegate* unmanaged)&_getRelocTypeHint; callbacks[173] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[174] = (delegate* unmanaged)&_getJitFlags; - callbacks[175] = (delegate* unmanaged)&_getSpecialCopyHelper; - callbacks[176] = (delegate* unmanaged)&_getTargetVectorLength; - callbacks[177] = (delegate* unmanaged)&_getJitFlags; - callbacks[178] = (delegate* unmanaged)&_getSpecialCopyHelper; + callbacks[174] = (delegate* unmanaged)&_getTargetVectorLength; + callbacks[175] = (delegate* unmanaged)&_getJitFlags; + callbacks[176] = (delegate* unmanaged)&_getSpecialCopyHelper; return (IntPtr)callbacks; } From 1a331023ef567ea5c7da2c9727dc55f766f1b5b7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 May 2025 09:55:00 -0700 Subject: [PATCH 081/120] Do some tracking of simdType --- src/coreclr/jit/codegenarm64.cpp | 4 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.cpp | 59 ++++++++++++++-------------- src/coreclr/jit/gentree.h | 3 +- src/coreclr/jit/hwintrinsicarm64.cpp | 6 +-- 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 39ddf5753ea73e..5c263549aa696b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5352,7 +5352,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) regNumber tgtReg = node->GetRegNum(); #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (Compiler::UseSveForVectorT()) + if (Compiler::UseSveForType(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the @@ -5432,7 +5432,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (Compiler::UseSveForVectorT()) + if (Compiler::UseSveForType(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fadec1c2ec47fa..96351fd4f48fbf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8946,7 +8946,7 @@ class Compiler } FORCEINLINE static bool UseSveForType(var_types type) { - return UseSveForVectorT() && varTypeIsSIMD(type); + return UseSveForVectorT() && ((type == TYP_SIMD32) || (type == TYP_SIMD64)); } FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9d042a828f80c9..ae2f02b43f97a8 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20911,7 +20911,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_AbsScalar : NI_AdvSimd_Arm64_Abs; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -21014,7 +21014,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( op2 = gtNewOperNode(GT_NEG, TYP_INT, op2); } - if (UseSveForVectorT()) + if (UseSveForType(type)) { op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); @@ -21610,7 +21610,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -22037,7 +22037,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, hwIntrinsicID); + hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, hwIntrinsicID); assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } @@ -22481,7 +22481,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else { - assert(UseSveForVectorT()); + assert(UseSveForType(simdType)); intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = @@ -22527,7 +22527,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( { intrinsic = NI_Vector128_op_Equality; } - if (UseSveForVectorT()) + if (UseSveForType(simdType)) { intrinsic = NI_Vector_op_Equality; @@ -22583,7 +22583,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -22689,7 +22689,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - if (UseSveForVectorT()) + if (UseSveForType(simdType)) { GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); @@ -22738,7 +22738,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else { - assert(UseSveForVectorT()); + assert(UseSveForType(simdType)); intrinsic = NI_Vector_op_Inequality; @@ -22770,7 +22770,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -22815,7 +22815,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForVectorT()) + if (UseSveForType(type)) { intrinsic = NI_Sve_ConditionalSelect; op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); @@ -22825,7 +22825,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( intrinsic = NI_AdvSimd_BitwiseSelect; } - intrinsic = UseSveForVectorT() ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; + intrinsic = UseSveForType(type) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -23479,7 +23479,7 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -23536,7 +23536,7 @@ GenTree* Compiler::gtNewSimdFmaNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); @@ -24776,7 +24776,7 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( if (intrinsic != NI_Illegal) { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25039,7 +25039,7 @@ GenTree* Compiler::gtNewSimdMinNativeNode( if (intrinsic != NI_Illegal) { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25621,7 +25621,7 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -27226,7 +27226,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForVectorT()) + if (UseSveForType(type)) { tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Sve_AddAcross, simdBaseJitType, simdSize); return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 16); @@ -27481,7 +27481,7 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -27785,7 +27785,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningLower; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsic, simdBaseJitType, 8); @@ -27999,7 +27999,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningUpper; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -29726,14 +29726,15 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // -NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) +//NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) +NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id) { NamedIntrinsic sveId = id; #ifdef TARGET_ARM64 // TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places - if (Compiler::UseSveForVectorT()) + if (Compiler::UseSveForType(simdType)) { switch (id) { @@ -29874,7 +29875,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, N } } // Make sure if we are using VL SIMD, we are not generating AdvSimd/NEON intrinsics - assert((simdSize <= 16) || (sveId < FIRST_NI_AdvSimd) || (sveId > LAST_NI_AdvSimd)); + assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD16) || (sveId < FIRST_NI_AdvSimd) || (sveId > LAST_NI_AdvSimd)); #endif // TARGET_ARM64 return sveId; @@ -29973,7 +29974,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } } - id = GetScalableHWIntrinsicId(simdSize, id); + id = GetScalableHWIntrinsicId(simdType, id); return id; } @@ -30765,7 +30766,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } } - id = GetScalableHWIntrinsicId(simdSize, id); + id = GetScalableHWIntrinsicId(simdType, id); return id; } @@ -31125,7 +31126,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; } #elif defined(TARGET_ARM64) - if (Compiler::UseSveForVectorT()) + if (Compiler::UseSveForType(simdType)) { id = NI_Sve_CompareNotEqualTo; } @@ -31139,7 +31140,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } } - id = GetScalableHWIntrinsicId(simdSize, id); + id = GetScalableHWIntrinsicId(simdType, id); return id; } @@ -31220,7 +31221,7 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( case GT_GT: case GT_LT: { - if (Compiler::UseSveForVectorT()) + if (Compiler::UseSveForType(type)) { lookupType = TYP_MASK; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 19137026d2106b..e1c8f70f8b2b6a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6589,7 +6589,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); - static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); + //static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); + static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id); static NamedIntrinsic GetHWIntrinsicIdForUnOp( Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 17e619e0f43047..0f03720cf4b234 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1042,7 +1042,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble; - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(simdSize, intrinsic); + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, intrinsic); op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); @@ -1672,7 +1672,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_CreateSequence: { - assert(Compiler::UseSveForVectorT()); + assert(Compiler::UseSveForType(retType)); op2 = impPopStack().val; op1 = impPopStack().val; @@ -1682,7 +1682,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_ToScalar: { - if (UseSveForVectorT()) + if (UseSveForType(retType)) { op1 = impSIMDPopStack(); From a5889f6aa9f28110d6798d971b68f7f51526ff23 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 May 2025 10:11:36 -0700 Subject: [PATCH 082/120] Remove constraint of vector being only 16 bytes --- src/coreclr/vm/codeman.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 0df03a93cb9aba..82c33d453b0ac4 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1520,10 +1520,7 @@ void EEJitManager::SetCpuInfo() uint32_t maxVectorTLength = (maxVectorTBitWidth / 8); uint64_t sveLengthFromOS = GetSveLengthFromOS(); - // For now, enable SVE only when the system vector length is 16 bytes (128-bits) - // TODO: https://github.com/dotnet/runtime/issues/101477 - if (sveLengthFromOS == 16) - // if ((maxVectorTLength >= sveLengthFromOS) || (maxVectorTBitWidth == 0)) + if ((maxVectorTLength >= sveLengthFromOS) || (maxVectorTBitWidth == 0)) { CPUCompileFlags.Set(InstructionSet_Sve); From f97a198b956f1877592f8f6d9a9ebc65a09d3f4f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 May 2025 10:27:26 -0700 Subject: [PATCH 083/120] TEMP: Enable SVE for 16B as well --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/vm/methodtablebuilder.cpp | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index aca92911e46530..e13a3f11232226 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2615,7 +2615,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) { // In test mode, if UseSveForVectorT=1, then mimic that // we are generating for VL > 16B - compVectorTLength = 32; + compVectorTLength = 16; //32; } else #endif // DEBUG diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 96351fd4f48fbf..a5d55a7272725a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8946,7 +8946,7 @@ class Compiler } FORCEINLINE static bool UseSveForType(var_types type) { - return UseSveForVectorT() && ((type == TYP_SIMD32) || (type == TYP_SIMD64)); + return UseSveForVectorT() && varTypeIsSIMDOrMask(type); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); } FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index efb7f772f4c668..68a1a4ebfd0dbd 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1208,14 +1208,14 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() #elif defined(TARGET_ARM64) if (CPUCompileFlags.IsSet(InstructionSet_Sve_Arm64)) { -#ifdef _DEBUG - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseSveForVectorT) != 0) - { - // For testing purpose, pretend the vector length is 32 bytes - numInstanceFieldBytes = 32; - } - else -#endif +//#ifdef _DEBUG +// if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseSveForVectorT) != 0) +// { +// // For testing purpose, pretend the vector length is 32 bytes +// numInstanceFieldBytes = 32; +// } +// else +//#endif { numInstanceFieldBytes = (uint32_t)GetSveLengthFromOS(); } From 897f4743a8ba7dfb201b11c1bfe841c069e839b4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 May 2025 14:57:41 -0700 Subject: [PATCH 084/120] fix bugs for using TYP_SIMD16 for SVE --- src/coreclr/jit/codegenarm64.cpp | 4 +- src/coreclr/jit/compiler.cpp | 63 ++++++++++++++++++++++++++++---- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/emitarm64sve.cpp | 6 +-- src/coreclr/jit/instr.cpp | 3 +- 5 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5c263549aa696b..d6439642a54bda 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3041,7 +3041,9 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } } emitAttr attr = emitActualTypeSize(targetType); - if (attr == EA_SCALABLE) + bool isScalable = (attr == EA_SCALABLE) || (Compiler::UseSveForType(targetType)); + + if (isScalable) { // TODO-VL: Should we check the baseType or it doesn't matter because it is just reg->reg move GetEmitter()->emitIns_Mov(INS_sve_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired, diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e13a3f11232226..7db6daf53d26b1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2609,20 +2609,69 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #if defined(TARGET_ARM64) +/* +* #ifdef DEBUG +* if (matched) +* { +* compVectorTLength = getTargetLength(); +* compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); +* compUseSveForVectorT |= JitConfig.UseSveForVectorT(); +* } +* else +* { +* compVectorTLength = 16; +* compUseSveForVectorT = JitConfig.UseSveForVectorT(); +* } +* #else +* if (matched) +* { +* compVectorTLength = getTargetLength(); +* compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); +* } +* else +* { +* compVectorTLength = 0; +* compUseSveForVectorT = false; +* } +* #endif +* +*/ + + if (info.compMatchedVM) + { + compVectorTLength = info.compCompHnd->getTargetVectorLength(); + compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); #ifdef DEBUG - compUseSveForVectorT = JitConfig.UseSveForVectorT(); - if (compUseSveForVectorT) + compUseSveForVectorT |= (bool)JitConfig.UseSveForVectorT(); +#endif // DEBUG + } + else { + // altjit +#ifdef DEBUG + compUseSveForVectorT = JitConfig.UseSveForVectorT(); // In test mode, if UseSveForVectorT=1, then mimic that // we are generating for VL > 16B compVectorTLength = 16; //32; - } - else +#else + compVectorTLength = 0; #endif // DEBUG - { - compVectorTLength = info.compCompHnd->getTargetVectorLength(); - compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); } + +//#ifdef DEBUG +// compUseSveForVectorT = JitConfig.UseSveForVectorT(); +// if (compUseSveForVectorT) +// { +// // In test mode, if UseSveForVectorT=1, then mimic that +// // we are generating for VL > 16B +// compVectorTLength = 16; //32; +// } +// else +//#endif // DEBUG +// { +// compVectorTLength = info.compCompHnd->getTargetVectorLength(); +// compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); +// } #endif // TARGET_ARM64 bool enableInliningMethodsWithEH = JitConfig.JitInlineMethodsWithEH() > 0; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0ad38540a7f850..12f1fff362da19 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1424,7 +1424,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) con } else if (isVectorRegister(reg)) { - if (size == EA_16BYTE) + if ((size == EA_16BYTE) && !Compiler::UseSveForVectorT()) { rn = qRegNames[reg - REG_V0]; } diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 9a82f92f47b18a..fea84ad4d4ed38 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2663,7 +2663,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, case INS_sve_ldr: assert(insOptsNone(opt)); - assert(isScalableVectorSize(size)); + assert(isScalableVectorSize(size) || (size == EA_16BYTE)); assert(isGeneralRegister(reg2)); // nnnnn assert(isValidSimm<9>(imm)); // iii // iiiiii @@ -2703,7 +2703,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, case INS_sve_str: assert(insOptsNone(opt)); - assert(isScalableVectorSize(size)); + assert(isScalableVectorSize(size) || (size == EA_16BYTE)); assert(isGeneralRegister(reg2)); // nnnnn assert(isValidSimm<9>(imm)); // iii // iiiiii @@ -14228,7 +14228,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id) case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register assert(insOptsNone(id->idInsOpt())); - assert(isScalableVectorSize(id->idOpSize())); + assert(isScalableVectorSize(id->idOpSize()) || (id->idOpSize() == EA_16BYTE)); assert(isVectorRegister(id->idReg1())); // ttttt assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn assert(isValidSimm<9>(emitGetInsSC(id))); // iii diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index cd0cbac7694f4f..bb38f9920bd1b4 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -575,8 +575,9 @@ void CodeGen::inst_Mov(var_types dstType, #ifdef TARGET_ARM GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); #elif defined(TARGET_ARM64) + bool isScalable = (size == EA_SCALABLE) || (Compiler::UseSveForType(dstType)); GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, - size == EA_SCALABLE ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE); + isScalable ? INS_OPTS_SCALABLE_B : INS_OPTS_NONE); #else GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); #endif From 63a31fb4621454abda7b830442948b0d37b39a10 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 May 2025 10:31:39 -0700 Subject: [PATCH 085/120] fix bug for str/ldr using reserved register --- src/coreclr/jit/emitarm64sve.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index fea84ad4d4ed38..c761829845e7e2 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -21,6 +21,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ #include "instr.h" +#include "codegen.h" /*****************************************************************************/ @@ -2682,11 +2683,10 @@ void emitter::emitInsSve_R_R_I(instruction ins, } else { - // Otherwise, create the address first and then - // use it in str - // add reg2, reg2, imm - // str zn, [reg2] - emitIns_R_R_I(INS_add, EA_8BYTE, reg2, reg2, imm); + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + // For larger imm values (> 9 bits), calculate base + imm in a reserved register first. + codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm); + reg2 = rsvdReg; imm = 0; } } @@ -2722,11 +2722,10 @@ void emitter::emitInsSve_R_R_I(instruction ins, } else { - // Otherwise, create the address first and then - // use it in str - // add reg2, reg2, imm - // str zn, [reg2] - emitIns_R_R_I(INS_add, EA_8BYTE, reg2, reg2, imm); + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + // For larger imm values (> 9 bits), calculate base + imm in a reserved register first. + codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm); + reg2 = rsvdReg; imm = 0; } } From 05cfde4ec69a0352ce2a1f953efd905859976125 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 9 Jun 2025 17:16:04 -0700 Subject: [PATCH 086/120] Support to generate SVE for 16B too - use isScalable --- src/coreclr/jit/codegenarm64.cpp | 13 +- src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/compiler.h | 63 ++-- src/coreclr/jit/emitarm64.cpp | 4 +- src/coreclr/jit/emitarm64sve.cpp | 16 +- src/coreclr/jit/gentree.cpp | 294 ++++++++-------- src/coreclr/jit/gentree.h | 12 +- src/coreclr/jit/hwintrinsic.cpp | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 392 ++++++++++++++++++---- src/coreclr/jit/hwintrinsiclistarm64sve.h | 17 +- src/coreclr/jit/importervectorization.cpp | 4 +- src/coreclr/jit/instr.cpp | 10 +- src/coreclr/jit/lclmorph.cpp | 4 +- src/coreclr/jit/lower.cpp | 4 +- src/coreclr/jit/lsra.cpp | 8 +- src/coreclr/jit/lsraarm64.cpp | 1 + src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/morph.cpp | 18 +- src/coreclr/jit/vartype.h | 15 + 19 files changed, 627 insertions(+), 256 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d6439642a54bda..cd007510136295 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2280,6 +2280,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { // We ignore any differences between SIMD12 and SIMD16 here if we can broadcast the value // via mvni/movi. + // Also, even if UseSveForVectorT == true, we will continue generating loading in V* registers + // instead of Z* registers, because their size is same if VL == 16. + const bool is8 = tree->TypeIs(TYP_SIMD8); if (vecCon->IsAllBitsSet()) { @@ -2298,12 +2301,12 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre emit->emitIns_R_I(INS_movi, attr, targetReg, val.i32[0], is8 ? INS_OPTS_2S : INS_OPTS_4S); } else if (ElementsAreSame(val.i16, is8 ? 4 : 8) && - emitter::emitIns_valid_imm_for_movi(val.i16[0], EA_2BYTE)) + emitter::emitIns_valid_imm_for_movi(val.i16[0], EA_2BYTE)) { emit->emitIns_R_I(INS_movi, attr, targetReg, val.i16[0], is8 ? INS_OPTS_4H : INS_OPTS_8H); } else if (ElementsAreSame(val.i8, is8 ? 8 : 16) && - emitter::emitIns_valid_imm_for_movi(val.i8[0], EA_1BYTE)) + emitter::emitIns_valid_imm_for_movi(val.i8[0], EA_1BYTE)) { emit->emitIns_R_I(INS_movi, attr, targetReg, val.i8[0], is8 ? INS_OPTS_8B : INS_OPTS_16B); } @@ -5354,7 +5357,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node) regNumber tgtReg = node->GetRegNum(); #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (Compiler::UseSveForType(op1->TypeGet())) + if (Compiler::UseStrictSveForType(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the @@ -5420,7 +5423,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) assert((varSize == 16) || (Compiler::SizeMatchesVectorTLength(varSize))); regNumber srcReg = node->GetRegNum(); - assert((srcReg != REG_NA) || (Compiler::UseSveForType(node->TypeGet()))); + assert((srcReg != REG_NA) || (Compiler::UseStrictSveForType(node->TypeGet()))); regNumber lclVarReg = genConsumeReg(lclNode); assert(lclVarReg != REG_NA); @@ -5434,7 +5437,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node) #ifdef TARGET_ARM64 // TODO-VL: Write a helper to do this check for LclVars*, GenTree*, etc. - if (Compiler::UseSveForType(op1->TypeGet())) + if (Compiler::UseStrictSveForType(op1->TypeGet())) { // Until we custom ABI for SVE, we will just store entire contents of Z* registers // on stack. If we don't do it, we will need multiple free registers to save the diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 44b48b0d6fd6bd..9d1f336e5516e9 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3226,7 +3226,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) instruction ins = ins_Copy(node->reg, copyType); #ifdef TARGET_ARM64 - insOpts opts = Compiler::UseSveForType(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + insOpts opts = Compiler::UseStrictSveForType(copyType) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(copyType), node->copiedReg, node->reg, /* canSkip */ false, opts); #else @@ -3251,7 +3251,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) regNumber sourceReg = edge->from->copiedReg != REG_NA ? edge->from->copiedReg : edge->from->reg; instruction ins = ins_Copy(sourceReg, genActualType(edge->type)); #ifdef TARGET_ARM64 - insOpts opts = Compiler::UseSveForType(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; + insOpts opts = Compiler::UseStrictSveForType(edge->type) ? INS_OPTS_SCALABLE_D : INS_OPTS_NONE; GetEmitter()->emitIns_Mov(ins, emitActualTypeSize(edge->type), node->reg, sourceReg, /* canSkip */ true, opts); #else diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a5d55a7272725a..8896cd5ffdddca 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3154,7 +3154,9 @@ class Compiler GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable) + ); GenTree* gtNewSimdCeilNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3165,28 +3167,36 @@ class Compiler GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize - ARM64_ARG(bool wrapInCvtm = true)); + ARM64_ARG(bool isScalable) + ARM64_ARG(bool wrapInCmtv = true) + ); GenTree* gtNewSimdCmpOpAllNode(genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable) + ); GenTree* gtNewSimdCmpOpAnyNode(genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable) + ); GenTree* gtNewSimdCndSelNode(var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable) + ); #if defined(FEATURE_MASKED_HW_INTRINSICS) GenTree* gtNewSimdCvtMaskToVectorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3240,7 +3250,8 @@ class Compiler GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdGetIndicesNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3277,12 +3288,14 @@ class Compiler GenTree* gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdIsNegativeInfinityNode(var_types type, GenTree* op1, @@ -3302,12 +3315,14 @@ class Compiler GenTree* gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdIsPositiveInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdIsSubnormalNode(var_types type, GenTree* op1, @@ -3317,7 +3332,8 @@ class Compiler GenTree* gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdLoadNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3332,25 +3348,29 @@ class Compiler GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdMaxNativeNode(var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdMinNode(var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdMinNativeNode(var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize + ARM64_ARG(bool isScalable)); GenTree* gtNewSimdNarrowNode(var_types type, GenTree* op1, @@ -3413,7 +3433,7 @@ class Compiler var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize ARM64_ARG(bool isScalable)); GenTree* gtNewSimdWidenLowerNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -6709,7 +6729,7 @@ class Compiler GenTree* fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp); #if defined(FEATURE_HW_INTRINSICS) GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree); - GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node); + GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool isScalable)); GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node); #if defined(FEATURE_MASKED_HW_INTRINSICS) GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node); @@ -8946,9 +8966,14 @@ class Compiler } FORCEINLINE static bool UseSveForType(var_types type) { - return UseSveForVectorT() && varTypeIsSIMDOrMask(type); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); + return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && (type != TYP_SIMD8); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); + } + FORCEINLINE static bool UseStrictSveForType(var_types type) + { + // This method is used in scenarios where we do not know the type of HIR node or how the LIR node was formed. + // For such cases, we will generate SVE, only if we are guaranteed to have VL >= 32B. + return UseSveForType(type) && (type != TYP_SIMD16); } - FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) { return simdSize == compVectorTLength; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 12f1fff362da19..deef2de0b9b675 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1418,13 +1418,13 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) con { rn = xRegNames[reg]; } - else if (size == EA_4BYTE) + else if ((size == EA_4BYTE)) { rn = wRegNames[reg]; } else if (isVectorRegister(reg)) { - if ((size == EA_16BYTE) && !Compiler::UseSveForVectorT()) + if ((size == EA_16BYTE)) { rn = qRegNames[reg - REG_V0]; } diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index c761829845e7e2..04d64fde5fc4db 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2207,10 +2207,19 @@ void emitter::emitInsSve_R_R(instruction ins, { assert(size == EA_8BYTE); } - else + else if (opt == INS_OPTS_SCALABLE_S) { assert(size == EA_4BYTE); } + else if (opt == INS_OPTS_SCALABLE_H) + { + assert(size == EA_2BYTE); + } + else + { + assert(opt == INS_OPTS_SCALABLE_B); + assert(size == EA_1BYTE); + } #endif // DEBUG reg2 = encodingSPtoZR(reg2); fmt = IF_SVE_CB_2A; @@ -16254,9 +16263,12 @@ void emitter::emitDispInsSveHelp(instrDesc* id) // ., case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register + { emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, false); + emitAttr gprSize = (size == EA_8BYTE) ? size : EA_4BYTE; + emitDispReg(encodingZRtoSP(id->idReg2()), gprSize, false); break; + } // .H, .B case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ae2f02b43f97a8..9d782158fa7a92 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20911,7 +20911,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_AbsScalar : NI_AdvSimd_Arm64_Abs; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -20921,7 +20921,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si } GenTree* Compiler::gtNewSimdBinOpNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -21014,7 +21014,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( op2 = gtNewOperNode(GT_NEG, TYP_INT, op2); } - if (UseSveForType(type)) + if (UseSveForType(type) && isScalable) { op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); @@ -21118,7 +21118,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( } NamedIntrinsic intrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, op, op1, op2ForLookup, simdBaseType, simdSize, false); + GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, op, op1, op2ForLookup, simdBaseType, simdSize, false ARM64_ARG(isScalable)); if (intrinsic != NI_Illegal) { @@ -21146,8 +21146,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( // and produce overall better codegen. assert(fgNodeThreading != NodeThreading::LIR); - op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize); - return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize, isScalable); + return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize, isScalable); } #if defined(TARGET_XARCH) @@ -21507,7 +21507,11 @@ GenTree* Compiler::gtNewSimdBinOpNode( return gtNewSimdBinOpNode(GT_ADD, type, low, mid, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) - if (varTypeIsLong(simdBaseType)) + if (isScalable) + { + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_Multiply, simdBaseJitType, simdSize); + } + else if (varTypeIsLong(simdBaseType)) { GenTree** op2ToDup = nullptr; @@ -21541,11 +21545,11 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* op2Dup = fgMakeMultiUse(op2ToDup); assert(!varTypeIsArithmetic(op1Dup)); - op1Dup = gtNewSimdGetElementNode(TYP_LONG, op1Dup, gtNewIconNode(1), simdBaseJitType, simdSize); + op1Dup = gtNewSimdGetElementNode(TYP_LONG, op1Dup, gtNewIconNode(1), simdBaseJitType, simdSize ARM64_ARG(isScalable)); if (!varTypeIsArithmetic(op2Dup)) { - op2Dup = gtNewSimdGetElementNode(TYP_LONG, op2Dup, gtNewIconNode(1), simdBaseJitType, simdSize); + op2Dup = gtNewSimdGetElementNode(TYP_LONG, op2Dup, gtNewIconNode(1), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } // upper = op1.GetElement(1) * op2.GetElement(1) @@ -21610,7 +21614,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -22037,7 +22041,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, hwIntrinsicID); + //hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, hwIntrinsicID); assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } @@ -22081,7 +22085,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize ARM64_ARG(bool wrapInCvtm)) + unsigned simdSize ARM64_ARG(bool isScalable) ARM64_ARG(bool wrapInCmtv)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -22097,20 +22101,21 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - var_types lookupType = GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op, type, simdBaseType, simdSize); + var_types lookupType = GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op, type, simdBaseType, simdSize ARM64_ARG(isScalable)); NamedIntrinsic intrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, op, lookupType, op1, op2, simdBaseType, simdSize, false); + GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, op, lookupType, op1, op2, simdBaseType, simdSize, false ARM64_ARG(isScalable)); if (intrinsic != NI_Illegal) { #if defined(FEATURE_MASKED_HW_INTRINSICS) - bool wrapCallInConvertVectorToMask = (lookupType != type); + bool wrapCallInConvertMaskToVector = (lookupType != type); #if defined(TARGET_ARM64) - wrapCallInConvertVectorToMask &= wrapInCvtm; + wrapCallInConvertMaskToVector &= isScalable; + wrapCallInConvertMaskToVector &= wrapInCmtv; #endif - if (wrapCallInConvertVectorToMask) + if (wrapCallInConvertMaskToVector) { assert(varTypeIsMask(lookupType)); GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -22371,8 +22376,8 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, assert(!canUseEvexEncodingDebugOnly()); #endif // TARGET_XARCH - GenTree* result = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize); - return gtNewSimdUnOpNode(GT_NOT, type, result, simdBaseJitType, simdSize); + GenTree* result = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize, isScalable); + return gtNewSimdUnOpNode(GT_NOT, type, result, simdBaseJitType, simdSize, isScalable); } default: @@ -22383,7 +22388,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, } GenTree* Compiler::gtNewSimdCmpOpAllNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); @@ -22471,21 +22476,13 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( #elif defined(TARGET_ARM64) case GT_EQ: { - if (simdSize == 8) - { - intrinsic = NI_Vector64_op_Equality; - } - else if (simdSize == 16) - { - intrinsic = NI_Vector128_op_Equality; - } - else + if (UseSveForType(simdType) && isScalable) { - assert(UseSveForType(simdType)); + assert(UseSveForType(simdType) && isScalable); intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22504,9 +22501,13 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // However, NOT() operation only operates on "byte" variant i.e. `p1.B`, while the result of `p1` from // `SVE_CMP_CC` can be of other variants like `p1.S` or `p1.D`, etc. GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + } + else + { + intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; } break; } @@ -22519,20 +22520,12 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() == Vector128.AllBitsSet - if (simdSize == 8) - { - intrinsic = NI_Vector64_op_Equality; - } - else - { - intrinsic = NI_Vector128_op_Equality; - } - if (UseSveForType(simdType)) + if (UseSveForType(simdType) && isScalable) { intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize /* isScalable */ARM64_ARG(true) /* wrapInCmtv */ARM64_ARG(false)); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22557,7 +22550,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else { - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); + intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, isScalable); op2 = gtNewAllBitsSetConNode(simdType); } @@ -22583,14 +22577,14 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdCmpOpAnyNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); @@ -22687,12 +22681,12 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( // We want to generate a comparison along the lines of // GT_XX(op1, op2).As() != Vector128.Zero - intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - - if (UseSveForType(simdType)) + if (UseSveForType(simdType) && isScalable) { + intrinsic = NI_Vector_op_Equality; + GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -22702,14 +22696,15 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( // if r1 != 0 return true else false GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } else { - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize); + intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv*/ false); op2 = gtNewZeroConNode(simdType); } @@ -22728,22 +22723,12 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( case GT_NE: { - if (simdSize == 8) + if (UseSveForType(simdType) && isScalable) { - intrinsic = NI_Vector64_op_Inequality; - } - else if (simdSize == 16) - { - intrinsic = NI_Vector128_op_Inequality; - } - else - { - assert(UseSveForType(simdType)); - intrinsic = NI_Vector_op_Inequality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -22758,6 +22743,10 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } + else + { + intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; + } break; } #else @@ -22770,14 +22759,14 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdCndSelNode( - var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -22815,7 +22804,7 @@ GenTree* Compiler::gtNewSimdCndSelNode( } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForType(type)) + if (UseSveForType(type) && isScalable) { intrinsic = NI_Sve_ConditionalSelect; op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); @@ -22825,7 +22814,6 @@ GenTree* Compiler::gtNewSimdCndSelNode( intrinsic = NI_AdvSimd_BitwiseSelect; } - intrinsic = UseSveForType(type) ? NI_Sve_ConditionalSelect : NI_AdvSimd_BitwiseSelect; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -23378,13 +23366,13 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( else { GenTree* indices = gtNewSimdGetIndicesNode(type, simdBaseJitType, simdSize); - result = gtNewSimdBinOpNode(GT_MUL, type, indices, op2, simdBaseJitType, simdSize); + result = gtNewSimdBinOpNode(GT_MUL, type, indices, op2, simdBaseJitType, simdSize ARM64_ARG(false)); } if (isPartial) { GenTree* start = gtNewSimdCreateBroadcastNode(type, op1, simdBaseJitType, simdSize); - result = gtNewSimdBinOpNode(GT_ADD, type, result, start, simdBaseJitType, simdSize); + result = gtNewSimdBinOpNode(GT_ADD, type, result, start, simdBaseJitType, simdSize ARM64_ARG(false)); } return result; @@ -23479,7 +23467,7 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -23536,14 +23524,14 @@ GenTree* Compiler::gtNewSimdFmaNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdGetElementNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { NamedIntrinsic intrinsicId = NI_Vector128_GetElement; var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); @@ -23621,6 +23609,16 @@ GenTree* Compiler::gtNewSimdGetElementNode( op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound); } +#if defined(TARGET_ARM64) + if (isScalable) + { + var_types op1Type = op1->TypeGet(); + op1 = + gtNewSimdHWIntrinsicNode(op1Type, op1, op2, NI_Sve_DuplicateSelectedScalarToVector, simdBaseJitType, simdSize); + return gtNewSimdToScalarNode(type, op1, simdBaseJitType, 16); + } +#endif + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsicId, simdBaseJitType, simdSize); } @@ -23801,8 +23799,8 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize); - return gtNewSimdIsZeroNode(type, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize, false); + return gtNewSimdIsZeroNode(type, op1, simdBaseJitType, simdSize ARM64_ARG(false)); } //---------------------------------------------------------------------------------------------- @@ -23850,8 +23848,9 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + assert(varTypeIsNeonSIMD(type)); + op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseJitType, simdSize, false); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); } assert(varTypeIsIntegral(simdBaseType)); @@ -23886,7 +23885,7 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfo if (varTypeIsFloating(simdBaseType)) { op1 = gtNewSimdAbsNode(type, op1, simdBaseJitType, simdSize); - return gtNewSimdIsPositiveInfinityNode(type, op1, simdBaseJitType, simdSize); + return gtNewSimdIsPositiveInfinityNode(type, op1, simdBaseJitType, simdSize ARM64_ARG(false)); } return gtNewZeroConNode(type); } @@ -23915,6 +23914,7 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); + assert(varTypeIsNeonSIMD(type)); if (varTypeIsFloating(simdBaseType)) { @@ -23924,9 +23924,9 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT op1 = gtNewSimdIsFiniteNode(type, op1, simdBaseJitType, simdSize); op1Dup1 = gtNewSimdTruncNode(type, op1Dup1, simdBaseJitType, simdSize); - GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseJitType, simdSize); + GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseJitType, simdSize, false); - return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize); + return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize, false); } assert(varTypeIsIntegral(simdBaseType)); @@ -23945,7 +23945,7 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT // Returns: // The created IsNaN node // -GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -23961,7 +23961,7 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType if (varTypeIsFloating(simdBaseType)) { GenTree* op1Dup = fgMakeMultiUse(&op1); - return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize, isScalable); } return gtNewZeroConNode(type); } @@ -23978,7 +23978,7 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType // Returns: // The created IsNegative node // -GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24004,7 +24004,7 @@ GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfo { return gtNewZeroConNode(type); } - return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, isScalable); } //---------------------------------------------------------------------------------------------- @@ -24055,7 +24055,7 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize, false); } return gtNewZeroConNode(type); } @@ -24084,6 +24084,7 @@ GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoTy var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); + assert(varTypeIsNeonSIMD(type)); if (varTypeIsFloating(simdBaseType)) { @@ -24114,12 +24115,12 @@ GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoTy cnsNode1 = gtNewSimdCreateBroadcastNode(type, cnsNode1, simdBaseJitType, simdSize); cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseJitType, simdSize); - op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize); + op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize, false); + return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize, false); } assert(varTypeIsIntegral(simdBaseType)); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); } //---------------------------------------------------------------------------------------------- @@ -24143,6 +24144,7 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsNeonSIMD(type)); assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24150,8 +24152,8 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize, false); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); } //---------------------------------------------------------------------------------------------- @@ -24166,7 +24168,7 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, // Returns: // The created IsPositive node // -GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24192,7 +24194,7 @@ GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfo { return gtNewAllBitsSetConNode(type); } - return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } //---------------------------------------------------------------------------------------------- @@ -24210,7 +24212,8 @@ GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, - unsigned simdSize) + unsigned simdSize + ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24243,7 +24246,7 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } return gtNewZeroConNode(type); } @@ -24303,9 +24306,9 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, cnsNode1 = gtNewOneConNode(type, simdBaseType); cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseJitType, simdSize); - op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize); + op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize ARM64_ARG(false)); - return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize ARM64_ARG(false)); } return gtNewZeroConNode(type); } @@ -24322,7 +24325,7 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, // Returns: // The created IsZero node // -GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24335,7 +24338,7 @@ GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } //---------------------------------------------------------------------------------------------- @@ -24525,7 +24528,7 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, } GenTree* Compiler::gtNewSimdMaxNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24571,11 +24574,11 @@ GenTree* Compiler::gtNewSimdMaxNode( } #endif // TARGET_XARCH - return gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize); + return gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } GenTree* Compiler::gtNewSimdMaxNativeNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24776,7 +24779,7 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( if (intrinsic != NI_Illegal) { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -24788,11 +24791,11 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); // result = ConditionalSelect(op1, op1Dup, op2Dup) - return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); + return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } GenTree* Compiler::gtNewSimdMinNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24838,11 +24841,11 @@ GenTree* Compiler::gtNewSimdMinNode( } #endif // TARGET_XARCH - return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize); + return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } GenTree* Compiler::gtNewSimdMinNativeNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -25039,7 +25042,7 @@ GenTree* Compiler::gtNewSimdMinNativeNode( if (intrinsic != NI_Illegal) { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25048,10 +25051,10 @@ GenTree* Compiler::gtNewSimdMinNativeNode( GenTree* op2Dup = fgMakeMultiUse(&op2); // op1 = op1 < op2 - op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); + op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); // result = ConditionalSelect(op1, op1Dup, op2Dup) - return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); + return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } GenTree* Compiler::gtNewSimdNarrowNode( @@ -25621,7 +25624,7 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -26160,7 +26163,7 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( cnsNode = gtNewVconNode(type); cnsNode->AsVecCon()->gtSimdVal = orCns; - op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize ARM64_ARG(false)); } retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize); @@ -26249,8 +26252,8 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( assert(genTypeSize(JitType2PreciseVarType(corType)) == elementSize); // create the mask node (op2 < comparand), and the result node (mask & nativeResult) - GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize); - retNode = gtNewSimdBinOpNode(GT_AND, type, retNode, mask, simdBaseJitType, simdSize); + GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize ARM64_ARG(false)); + retNode = gtNewSimdBinOpNode(GT_AND, type, retNode, mask, simdBaseJitType, simdSize ARM64_ARG(false)); } else { @@ -27481,14 +27484,14 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdUnOpNode( - genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) + genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -27543,7 +27546,7 @@ GenTree* Compiler::gtNewSimdUnOpNode( #endif // TARGET_ARM64 NamedIntrinsic intrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp(this, op, op1, simdBaseType, simdSize, false); + GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp(this, op, op1, simdBaseType, simdSize, false ARM64_ARG(isScalable)); if (intrinsic != NI_Illegal) { @@ -27785,7 +27788,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningLower; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsic, simdBaseJitType, 8); @@ -27999,7 +28002,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningUpper; } - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -29727,10 +29730,14 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // //NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) -NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id) +NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, var_types simdBaseType, NamedIntrinsic id) { NamedIntrinsic sveId = id; - + if ( (id == NI_Illegal) || ((FIRST_NI_Sve <= sveId) && (sveId <= LAST_NI_Sve))) + { + return sveId; + } + #ifdef TARGET_ARM64 // TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places @@ -29816,7 +29823,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, case NI_AdvSimd_Arm64_Multiply: sveId = NI_Sve_Multiply; break; + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + sveId = NI_Sve_MultiplyByScalar; + break; case NI_AdvSimd_Negate: + case NI_AdvSimd_Arm64_Negate: sveId = NI_Sve_Negate; break; case NI_AdvSimd_Not: @@ -29871,7 +29883,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, sveId = NI_Sve_Xor; break; default: - sveId = id; + assert(!"Did not find matching AdvSimd -> Sve"); + break; } } // Make sure if we are using VL SIMD, we are not generating AdvSimd/NEON intrinsics @@ -29896,7 +29909,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, // The intrinsic ID based on the oper, base type, and simd size // NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( - Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar) + Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); @@ -29974,7 +29987,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( } } - id = GetScalableHWIntrinsicId(simdType, id); +#if defined(TARGET_ARM64) + if (isScalable && Compiler::UseSveForType(simdType)) + { + id = GetScalableHWIntrinsicId(simdType, simdBaseType, id); + } +#endif return id; } @@ -30000,7 +30018,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, GenTree* op2, var_types simdBaseType, unsigned simdSize, - bool isScalar) + bool isScalar + ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); @@ -30766,7 +30785,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } } - id = GetScalableHWIntrinsicId(simdType, id); + // simdType can be `TYP_SIMD16` for three cases: + // - We originally had Vector128, then we should retain AdvSimd + // - We originally had VectorT, and UseSve=0, then we should retain AdvSimd + // - We originally had VectorT, and UseSve=1, then we should convert to Sve + if (isScalable && Compiler::UseSveForType(simdType)) + { + id = GetScalableHWIntrinsicId(simdType, simdBaseType, id); + } return id; } @@ -30794,7 +30820,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, GenTree* op2, var_types simdBaseType, unsigned simdSize, - bool isScalar) + bool isScalar + ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); @@ -31126,7 +31153,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; } #elif defined(TARGET_ARM64) - if (Compiler::UseSveForType(simdType)) + if (Compiler::UseSveForType(simdType) && isScalable) { id = NI_Sve_CompareNotEqualTo; } @@ -31140,7 +31167,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } } - id = GetScalableHWIntrinsicId(simdType, id); + if (Compiler::UseSveForType(simdType) && isScalable) + { + id = GetScalableHWIntrinsicId(simdType, simdBaseType, id); + } return id; } @@ -31163,7 +31193,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, // may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK. // var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( - Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize) + Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); @@ -31221,7 +31251,7 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( case GT_GT: case GT_LT: { - if (Compiler::UseSveForType(type)) + if (Compiler::UseSveForType(type) && isScalable) { lookupType = TYP_MASK; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e1c8f70f8b2b6a..56aebe688b3936 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6590,10 +6590,10 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); //static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); - static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, NamedIntrinsic id); + static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, var_types simdBaseType, NamedIntrinsic id); static NamedIntrinsic GetHWIntrinsicIdForUnOp( - Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar); + Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar ARM64_ARG(bool isScalable)); static NamedIntrinsic GetHWIntrinsicIdForBinOp(Compiler* comp, genTreeOps oper, @@ -6601,7 +6601,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic GenTree* op2, var_types simdBaseType, unsigned simdSize, - bool isScalar); + bool isScalar + ARM64_ARG(bool isScalable)); static NamedIntrinsic GetHWIntrinsicIdForCmpOp(Compiler* comp, genTreeOps oper, @@ -6610,10 +6611,11 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic GenTree* op2, var_types simdBaseType, unsigned simdSize, - bool isScalar); + bool isScalar + ARM64_ARG(bool isScalable)); static var_types GetLookupTypeForCmpOp( - Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize); + Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize ARM64_ARG(bool isScalable)); static genTreeOps GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index f1f05eb652d405..c2cf938edba251 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1628,7 +1628,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitTyp assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128)); #endif // TARGET_XARCH #ifdef TARGET_ARM64 - assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128)); + assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128) || (isa == InstructionSet_Vector)); #endif // TARGET_ARM64 #endif // DEBUG return false; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0f03720cf4b234..ecc777fbf03751 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -725,6 +725,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); op1 = impSIMDPopStack(); retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_Abs) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -733,12 +738,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_op_Addition: case NI_Vector128_op_Addition: { + bool isScalable = (intrinsic == NI_Vector_Add) || (intrinsic == NI_Vector_op_Addition); assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize, isScalable); break; } @@ -752,7 +758,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize, false); } else { @@ -773,6 +779,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_AndNot: case NI_Vector128_AndNot: { + bool isScalable = intrinsic == NI_Vector_AndNot; assert(sig->numArgs == 2); // We don't want to support creating AND_NOT nodes prior to LIR @@ -783,8 +790,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseJitType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); + GenTree* notNode = gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseJitType, simdSize, isScalable); + op2 = gtFoldExpr(notNode); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, isScalable); break; } @@ -800,8 +808,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseJitType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize); + op2 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseJitType, simdSize, false)); + retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize, false); break; } @@ -985,7 +993,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_BitwiseAnd); break; } @@ -998,7 +1006,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_BitwiseOr); break; } @@ -1016,6 +1024,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCeilNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_Ceiling) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1029,7 +1042,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize); + retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize, intrinsic == NI_Vector_ConditionalSelect); break; } @@ -1042,7 +1055,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble; - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, intrinsic); + //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, intrinsic); op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); @@ -1069,6 +1082,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_ConvertToInt32) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1092,6 +1110,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_ConvertToInt64) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1104,6 +1127,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_ConvertToSingle) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1127,6 +1155,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); + //if ((intrinsic == NI_Vector_ConvertToUInt32Native) || (intrinsic == NI_Vector_ConvertToUInt32)) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1150,6 +1183,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); + //if ((intrinsic == NI_Vector_ConvertToUInt64Native) || (intrinsic == NI_Vector_ConvertToUInt64)) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1404,10 +1442,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass); - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Division); break; } + case NI_Vector_Dot: // TODO-VL : Fix DOT to use SVE case NI_Vector64_Dot: case NI_Vector128_Dot: { @@ -1421,7 +1460,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseJitType, simdSize); - retNode = gtNewSimdGetElementNode(retType, retNode, gtNewIconNode(0), simdBaseJitType, simdSize); + retNode = gtNewSimdGetElementNode(retType, retNode, gtNewIconNode(0), simdBaseJitType, simdSize, false); } break; } @@ -1435,7 +1474,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Equals); break; } @@ -1448,7 +1487,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Equality); + //if (intrinsic == NI_Vector_op_Equality) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1461,7 +1505,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_EqualsAny); + //if (intrinsic == NI_Vector_EqualsAny) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1646,6 +1695,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdFloorNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_Floor) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1667,6 +1721,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_FusedMultiplyAdd) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1682,13 +1741,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_ToScalar: { - if (UseSveForType(retType)) - { - op1 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - // Even for SVE, to scalar always would fetch 0th element from the overlapping SIMD register. - retNode = gtNewSimdToScalarNode(genActualType(simdBaseType), op1, simdBaseJitType, 16); - } + // Even for SVE, to scalar always would fetch 0th element from the overlapping SIMD register. + retNode = gtNewSimdToScalarNode(genActualType(simdBaseType), op1, simdBaseJitType, 16); break; } @@ -1734,6 +1790,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_GetElement: case NI_Vector64_GetElement: case NI_Vector128_GetElement: { @@ -1743,7 +1800,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GetElement); break; } @@ -1774,7 +1831,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThan); break; } @@ -1787,7 +1844,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanAll); + //if (intrinsic == NI_Vector_GreaterThanAll) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1800,7 +1862,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanAny); + //if (intrinsic == NI_Vector_GreaterThanAny) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1813,7 +1880,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqual); + //if (intrinsic == NI_Vector_GreaterThanOrEqual) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1826,7 +1898,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqualAll); + //if (intrinsic == NI_Vector_GreaterThanOrEqualAll) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1839,7 +1916,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqualAny); + //if (intrinsic == NI_Vector_GreaterThanOrEqualAny) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -1893,7 +1975,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNaNNode(retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdIsNaNNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsNaN); break; } @@ -1903,7 +1985,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsNegative); break; } @@ -1948,7 +2030,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsPositive); break; } @@ -1958,7 +2040,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsPositiveInfinity); break; } @@ -1977,7 +2059,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsZeroNode(retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdIsZeroNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsZero); break; } @@ -1990,7 +2072,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThan); + //if (intrinsic == NI_Vector_LessThan) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2003,7 +2090,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanAll); + //if (intrinsic == NI_Vector_LessThanAll) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2016,7 +2108,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanAny); + //if (intrinsic == NI_Vector_LessThanAny) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2029,7 +2126,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqual); break; } @@ -2042,7 +2139,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqualAll); + //if (intrinsic == NI_Vector_LessThanOrEqualAll) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2055,7 +2157,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqualAny); + //if (intrinsic == NI_Vector_LessThanOrEqualAny) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2155,7 +2262,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMaxNode(retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdMaxNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Max); + //if (intrinsic == NI_Vector_Max) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2173,7 +2285,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MaxNative); + //if (intrinsic == NI_Vector_MaxNative) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2187,7 +2304,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMinNode(retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdMinNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Min); + //if (intrinsic == NI_Vector_Min) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2205,7 +2327,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMinNativeNode(retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdMinNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MinNative); + //if (intrinsic == NI_Vector_MinNative) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2226,7 +2353,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass); - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Multiply); break; } @@ -2257,15 +2384,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_MultiplyAddEstimate) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} } else { - GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize); - retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize); + GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MultiplyAddEstimate); + retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize, intrinsic == NI_Vector_MultiplyAddEstimate); } break; } + case NI_Vector_Narrow: case NI_Vector64_Narrow: case NI_Vector128_Narrow: { @@ -2311,12 +2444,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_op_UnaryNegation: case NI_Vector64_op_UnaryNegation: case NI_Vector128_op_UnaryNegation: { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_UnaryNegation); break; } @@ -2326,7 +2460,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); + retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_OnesComplement); + //if (intrinsic == NI_Vector_op_OnesComplement) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2339,7 +2478,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Inequality); + //if (intrinsic == NI_Vector_op_Inequality) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2361,7 +2505,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Subtraction); break; } @@ -2373,7 +2517,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize, true); retNode->AsHWIntrinsic()->SetAuxiliaryJitType(simdBaseJitType); break; } @@ -2385,7 +2529,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize, false); break; } @@ -2398,7 +2542,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize, true); retNode->AsHWIntrinsic()->SetAuxiliaryJitType(simdBaseJitType); break; } @@ -2412,7 +2556,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize, false); break; } @@ -2424,7 +2568,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize, false); break; } @@ -2445,6 +2589,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdRoundNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_Round) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2831,7 +2980,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize, false); } else { @@ -2871,6 +3020,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdTruncNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_Truncate) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2883,6 +3037,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdWidenLowerNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_WidenLower) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2895,6 +3054,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdWidenUpperNode(retType, op1, simdBaseJitType, simdSize); + //if (intrinsic == NI_Vector_WidenUpper) + //{ + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + //} break; } @@ -2969,7 +3133,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize); + retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_ExclusiveOr); break; } @@ -3504,6 +3668,120 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } +#ifdef TARGET_ARM64 + if ((retNode != nullptr) && (intrinsic >= FIRST_NI_Vector) && (intrinsic <= LAST_NI_Vector)) + { + // For VectorT, map the intrinsics + switch (intrinsic) + { + case NI_Vector_Abs: + case NI_Vector_Ceiling: + case NI_Vector_ConvertToDouble: + case NI_Vector_ConvertToInt32Native: + case NI_Vector_ConvertToInt32: + case NI_Vector_ConvertToInt64Native: + case NI_Vector_ConvertToInt64: + case NI_Vector_ConvertToSingle: + case NI_Vector_ConvertToUInt32Native: + case NI_Vector_ConvertToUInt32: + case NI_Vector_ConvertToUInt64Native: + case NI_Vector_ConvertToUInt64: + case NI_Vector_Floor: + case NI_Vector_FusedMultiplyAdd: + case NI_Vector_Max: + case NI_Vector_MaxNative: + case NI_Vector_Min: + case NI_Vector_MinNative: + case NI_Vector_MultiplyAddEstimate: + case NI_Vector_Round: + case NI_Vector_op_Subtraction: + case NI_Vector_Sum: + case NI_Vector_Truncate: + case NI_Vector_WidenLower: + case NI_Vector_WidenUpper: + { + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + break; + } + case NI_Vector_Add: + case NI_Vector_op_Addition: + case NI_Vector_AndNot: + case NI_Vector_op_BitwiseAnd: + case NI_Vector_op_BitwiseOr: + case NI_Vector_op_Division: + case NI_Vector_op_Multiply: + case NI_Vector_op_ExclusiveOr: + { + // gtNewSimdBinOpNode should handle this + NamedIntrinsic sveIntrinsic = retNode->AsHWIntrinsic()->GetHWIntrinsicId(); + assert((FIRST_NI_Sve <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Sve)); + break; + } + case NI_Vector_ConditionalSelect: + case NI_Vector_Equals: + case NI_Vector_op_Equality: + case NI_Vector_EqualsAny: + case NI_Vector_GreaterThan: + case NI_Vector_GreaterThanAll: + case NI_Vector_GreaterThanAny: + case NI_Vector_GreaterThanOrEqual: + case NI_Vector_GreaterThanOrEqualAll: + case NI_Vector_GreaterThanOrEqualAny: + case NI_Vector_LessThan: + case NI_Vector_LessThanAll: + case NI_Vector_LessThanAny: + case NI_Vector_LessThanOrEqual: + case NI_Vector_LessThanOrEqualAll: + case NI_Vector_LessThanOrEqualAny: + case NI_Vector_op_Inequality: + { + // gtNewSimdCmpOpNode should handle this + NamedIntrinsic sveIntrinsic = retNode->AsHWIntrinsic()->GetHWIntrinsicId(); + assert(((FIRST_NI_Sve <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Sve)) || + ((FIRST_NI_Vector <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Vector)) || + (sveIntrinsic == NI_Sve_ConvertMaskToVector) || + (sveIntrinsic == NI_Sve_ConvertVectorToMask)); + break; + } + case NI_Vector_op_OnesComplement: + { + // gtNewSimdUnOpNode should handle this + break; + } + case NI_Vector_get_One: + case NI_Vector_get_Zero: + { + // This are constants + break; + } + case NI_Vector_As: + case NI_Vector_AsVectorByte: + case NI_Vector_AsVectorDouble: + case NI_Vector_AsVectorInt16: + case NI_Vector_AsVectorInt32: + case NI_Vector_AsVectorInt64: + case NI_Vector_AsVectorNInt: + case NI_Vector_AsVectorNUInt: + case NI_Vector_AsVectorSByte: + case NI_Vector_AsVectorSingle: + case NI_Vector_AsVectorUInt16: + case NI_Vector_AsVectorUInt32: + case NI_Vector_AsVectorUInt64: + case NI_Vector_get_Indices: + { + // no-op for these + break; + } + default: + { + //TODO-VL: Enable this + //unreached(); + break; + } + } + } +#endif assert(!isScalar || isValidScalarIntrinsic); return retNode; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 64217168f6db02..b74c46c1d31efc 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -50,11 +50,13 @@ HARDWARE_INTRINSIC(Vector, ConvertToUInt64Native, HARDWARE_INTRINSIC(Vector, Create, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector, CreateSequence, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Division, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Dot, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Equals, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, EqualsAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, EqualsAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, FusedMultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, GetElement, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Vector, GreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, GreaterThanAll, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, GreaterThanAny, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -83,6 +85,7 @@ HARDWARE_INTRINSIC(Vector, MinNative, HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Narrow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -91,7 +94,7 @@ HARDWARE_INTRINSIC(Vector, ShiftRightLogical, HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Sum, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) @@ -99,7 +102,6 @@ HARDWARE_INTRINSIC(Vector, Truncate, HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -//HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_Indices, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_One, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -126,7 +128,7 @@ HARDWARE_INTRINSIC(Vector, op_UnaryPlus, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE Intrinsics #define FIRST_NI_Sve NI_Sve_Abs -HARDWARE_INTRINSIC(Sve, Abs, -1, -1, {INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Abs, -1, -1, {INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_abs, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThan, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facgt, INS_sve_facgt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThanOrEqual, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facge, INS_sve_facge}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThan, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_faclt, INS_sve_faclt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) @@ -137,7 +139,7 @@ HARDWARE_INTRINSIC(Sve, AddAcross, HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, {INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -332,12 +334,13 @@ HARDWARE_INTRINSIC(Sve, Multiply, HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, MultiplyByScalar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Negate, -1, -1, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, Not, -1, -1, {INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, Not, -1, -1, {INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) +HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) @@ -408,7 +411,7 @@ HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, VectorTableLookup, -1, 2, {INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 0f9ac5643e9a92..8e65f6f7e7a2e2 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -98,13 +98,13 @@ GenTree* Compiler::impExpandHalfConstEquals( #ifdef FEATURE_HW_INTRINSICS if (varTypeIsSIMD(type)) { - return gtNewSimdBinOpNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(type)); + return gtNewSimdBinOpNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(type) ARM64_ARG(false)); } if (varTypeIsSIMD(op1)) { // E.g. a comparison of SIMD ops returning TYP_INT; assert(varTypeIsSIMD(op2)); - return gtNewSimdCmpOpAllNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(op1)); + return gtNewSimdCmpOpAllNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(op1) ARM64_ARG(false)); } #endif return gtNewOperNode(oper, type, op1, op2); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index bb38f9920bd1b4..7369f65e3546bb 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -575,7 +575,7 @@ void CodeGen::inst_Mov(var_types dstType, #ifdef TARGET_ARM GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); #elif defined(TARGET_ARM64) - bool isScalable = (size == EA_SCALABLE) || (Compiler::UseSveForType(dstType)); + bool isScalable = (size == EA_SCALABLE) || (Compiler::UseStrictSveForType(dstType)); GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, isScalable ? INS_OPTS_SCALABLE_B : INS_OPTS_NONE); #else @@ -1930,7 +1930,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #endif } #ifdef TARGET_ARM64 - else if (Compiler::UseSveForType(srcType)) + else if (Compiler::UseStrictSveForType(srcType)) { return INS_sve_ldr; } @@ -2024,7 +2024,7 @@ instruction CodeGen::ins_Copy(var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (Compiler::UseSveForType(dstType)) + else if (Compiler::UseStrictSveForType(dstType)) { return INS_sve_mov; } @@ -2153,7 +2153,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) #endif } #ifdef TARGET_ARM64 - else if (Compiler::UseSveForType(dstType)) + else if (Compiler::UseStrictSveForType(dstType)) { return INS_sve_mov; } @@ -2272,7 +2272,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #endif } #ifdef TARGET_ARM64 - else if (Compiler::UseSveForType(dstType)) + else if (Compiler::UseStrictSveForType(dstType)) { return INS_sve_str; } diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 89d06d24505cbc..86e98789a400b7 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1699,7 +1699,7 @@ class LocalAddressVisitor final : public GenTreeVisitor // Handle case 1 or the float field of case 2 GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, - CORINFO_TYPE_FLOAT, genTypeSize(varDsc)); + CORINFO_TYPE_FLOAT, genTypeSize(varDsc) ARM64_ARG(false)); break; } @@ -1777,7 +1777,7 @@ class LocalAddressVisitor final : public GenTreeVisitor GenTree* indexNode1 = m_compiler->gtNewIconNode(3, TYP_INT); simdLclNode = m_compiler->gtNewSimdGetElementNode(TYP_FLOAT, simdLclNode, indexNode1, - CORINFO_TYPE_FLOAT, 16); + CORINFO_TYPE_FLOAT, 16 ARM64_ARG(false)); GenTree* indexNode2 = m_compiler->gtNewIconNode(3, TYP_INT); hwiNode = m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode2, simdLclNode, diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f8cf8c4777e43f..90ec282cb5ecd4 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2464,10 +2464,10 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) { assert(type == TYP_INT); return comp->gtNewSimdCmpOpAllNode(oper, TYP_INT, op1, op2, CORINFO_TYPE_NATIVEUINT, - genTypeSize(op1)); + genTypeSize(op1) ARM64_ARG(false)); } return comp->gtNewSimdBinOpNode(oper, op1->TypeGet(), op1, op2, CORINFO_TYPE_NATIVEUINT, - genTypeSize(op1)); + genTypeSize(op1) ARM64_ARG(false)); } #endif return comp->gtNewOperNode(oper, type, op1, op2); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 199b76a2c20da8..e82ec4c52ee1f3 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -6041,7 +6041,7 @@ void LinearScan::allocateRegisters() lclVarInterval->isPartiallySpilled = true; } #elif defined(TARGET_ARM64) - else if (Compiler::UseSveForType(lclVarInterval->registerType)) + else if (Compiler::UseStrictSveForType(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -6061,7 +6061,7 @@ void LinearScan::allocateRegisters() { lclVarInterval->isPartiallySpilled = false; #if defined(TARGET_ARM64) - if (Compiler::UseSveForType(lclVarInterval->registerType)) + if (Compiler::UseStrictSveForType(lclVarInterval->registerType)) { // TODO-VL: Need to do this for allocateRegistersMinimal too? allocate = false; @@ -7543,7 +7543,7 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, // while on x86 we can spill directly to memory. regNumber spillReg = refPosition->assignedReg(); #ifdef TARGET_ARM64 - bool isVariableVL = Compiler::UseSveForType(varDsc->TypeGet()); + bool isVariableVL = Compiler::UseStrictSveForType(varDsc->TypeGet()); bool spillToMem = refPosition->spillAfter || isVariableVL; assert((spillReg != REG_NA) || isVariableVL); #else @@ -7646,7 +7646,7 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, simdUpperRestore->gtFlags |= GTF_NOREG_AT_USE; #else simdUpperRestore->gtFlags |= GTF_SPILLED; - assert((refPosition->assignedReg() != REG_NA) || (Compiler::UseSveForType(restoreLcl->TypeGet()))); + assert((refPosition->assignedReg() != REG_NA) || (Compiler::UseStrictSveForType(restoreLcl->TypeGet()))); restoreReg = refPosition->assignedReg(); #endif } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7124de55cfa266..0089b231f452d5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2181,6 +2181,7 @@ SingleTypeRegSet LinearScan::getOperandCandidates(GenTreeHWIntrinsic* intrinsicT case NI_Sve_MultiplyAddRotateComplexBySelectedScalar: isLowVectorOpNum = (opNum == 3); break; + case NI_Sve_MultiplyByScalar: case NI_Sve_MultiplyBySelectedScalar: isLowVectorOpNum = (opNum == 2); break; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 2deed987a8fd96..7b3cfed864d6db 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1484,7 +1484,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, #ifdef TARGET_XARCH forceRegOptional = true; #elif TARGET_ARM64 - forceRegOptional = Compiler::UseSveForType(tree->TypeGet()); + forceRegOptional = Compiler::UseStrictSveForType(tree->TypeGet()); #endif if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars)) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 20818c6f3915f3..cf8578e438e9c3 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9227,7 +9227,7 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp) // and if the call is one of these, attempt to optimize. // This is post-order, meaning that it will not morph the children. // -GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) +GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool isScalable)) { assert(opts.OptimizationEnabled()); @@ -9472,7 +9472,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { #if defined(TARGET_ARM64) // xarch doesn't have a native GT_NEG representation for integers and itself uses (Zero - v1) - GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize); + GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize, isScalable); DEBUG_DESTROY_NODE(op1); DEBUG_DESTROY_NODE(node); @@ -9529,7 +9529,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) if (op2->IsVectorAllBitsSet()) { // xarch doesn't have a native GT_NOT representation and itself uses (v1 ^ AllBitsSet) - GenTree* notNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); + GenTree* notNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize, isScalable); DEBUG_DESTROY_NODE(op2); DEBUG_DESTROY_NODE(node); @@ -9541,7 +9541,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) if (varTypeIsFloating(simdBaseType) && op2->IsVectorNegativeZero(simdBaseType)) { // xarch doesn't have a native GT_NEG representation for floating-point and itself uses (v1 ^ -0.0) - GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize); + GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize, isScalable); DEBUG_DESTROY_NODE(op2); DEBUG_DESTROY_NODE(node); @@ -11006,6 +11006,9 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) CorInfoType simdBaseJitType = tree->GetSimdBaseJitType(); var_types simdBaseType = tree->GetSimdBaseType(); unsigned simdSize = tree->GetSimdSize(); +#if defined(TARGET_ARM64) + bool isScalable = (((FIRST_NI_Vector <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Vector)) || ((FIRST_NI_Sve <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Sve))); +#endif if (tree->isCommutativeHWIntrinsic()) { @@ -11034,13 +11037,12 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) if (op1->IsCnsVec()) { // Move constant vectors from op1 to op2 for comparison operations - genTreeOps newOper = GenTree::SwapRelop(oper); var_types lookupType = - GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, simdSize); + GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, simdSize ARM64_ARG(isScalable)); NamedIntrinsic newId = GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, newOper, retType, op2, op1, simdBaseType, - simdSize, false); + simdSize, false ARM64_ARG(isScalable)); if (newId != NI_Illegal) { @@ -11076,7 +11078,7 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) } } - morphedTree = fgOptimizeHWIntrinsic(tree); + morphedTree = fgOptimizeHWIntrinsic(tree ARM64_ARG(isScalable)); } if (retType != morphedTree->TypeGet()) diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index e214d1f8a346ff..b60508ee5bcb44 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -75,6 +75,21 @@ inline bool varTypeIsSIMD(T vt) #endif } +template +inline bool varTypeIsNeonSIMD(T vt) +{ +#ifdef FEATURE_SIMD + bool result = varTypeIsSIMD(vt); +#ifdef TARGET_ARM64 + result = result && ((vt == TYP_SIMD8) || (vt == TYP_SIMD16)); +#endif // TARGET_ARM64 + return result; +#else + // Always return false if FEATURE_SIMD is not enabled + return false; +#endif +} + template inline bool varTypeIsMask(T vt) { From a8020fa645a7b1dea933605b8ecc70a0b8e6d566 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 10 Jun 2025 22:56:09 -0700 Subject: [PATCH 087/120] Handle Multiply and MultiplyByScalar --- src/coreclr/jit/gentree.cpp | 52 ++++++++++++++++----- src/coreclr/jit/hwintrinsicarm64.cpp | 31 +++++++++++- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 24 ++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 ++-- src/coreclr/jit/lowerarmarch.cpp | 5 ++ 5 files changed, 103 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9d782158fa7a92..dca5dafb30ef92 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21049,7 +21049,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( broadcastOp = &op2; } - if (broadcastOp != nullptr) + if ((broadcastOp != nullptr) && !isScalable) { #if defined(TARGET_ARM64) if (varTypeIsLong(simdBaseType)) @@ -21509,19 +21509,38 @@ GenTree* Compiler::gtNewSimdBinOpNode( #elif defined(TARGET_ARM64) if (isScalable) { + if (!varTypeIsSIMD(op2)) + { + if (varTypeIsFloating(op2)) + { + double op2Cns = 0.0; + if (op2->IsCnsFltOrDbl()) + { + op2Cns = op2->AsDblCon()->DconValue(); + } + + if ((op2Cns == 0.5) || (op2Cns == 2.0)) + { + //GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_MultiplyByScalar, simdBaseJitType, simdSize); + } + } + op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + } return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_Multiply, simdBaseJitType, simdSize); } else if (varTypeIsLong(simdBaseType)) { + assert(varTypeIsSIMD(op1)); + GenTree** op2ToDup = nullptr; - assert(varTypeIsSIMD(op1)); - op1 = gtNewSimdToScalarNode(TYP_LONG, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdToScalarNode(TYP_LONG, op1, simdBaseJitType, simdSize); GenTree** op1ToDup = &op1->AsHWIntrinsic()->Op(1); if (varTypeIsSIMD(op2)) { - op2 = gtNewSimdToScalarNode(TYP_LONG, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdToScalarNode(TYP_LONG, op2, simdBaseJitType, simdSize); op2ToDup = &op2->AsHWIntrinsic()->Op(1); } @@ -29805,6 +29824,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, sveId = NI_Sve_Divide; break; case NI_AdvSimd_Floor: + case NI_AdvSimd_Arm64_Floor: sveId = NI_Sve_RoundToNegativeInfinity; break; case NI_AdvSimd_FusedMultiplyAdd: @@ -29842,6 +29862,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, sveId = NI_Sve_RoundToNearest; break; case NI_AdvSimd_RoundToZero: + case NI_AdvSimd_Arm64_RoundToZero: sveId = NI_Sve_RoundToZero; break; case NI_AdvSimd_ShiftLogical: @@ -30390,17 +30411,24 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_SSE2_MultiplyLow; } #elif defined(TARGET_ARM64) - if ((simdSize == 8) && (isScalar || (simdBaseType == TYP_DOUBLE))) - { - id = NI_AdvSimd_MultiplyScalar; - } - else if (simdBaseType == TYP_DOUBLE) + if (isScalable) { - id = op2->TypeIs(simdType) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Arm64_MultiplyByScalar; + id = varTypeIsSIMD(op2) ? NI_Sve_Multiply : NI_Illegal; } - else if (!varTypeIsLong(simdBaseType)) + else { - id = op2->TypeIs(simdType) ? NI_AdvSimd_Multiply : NI_AdvSimd_MultiplyByScalar; + if ((simdSize == 8) && (simdBaseType == TYP_DOUBLE)) + { + id = NI_AdvSimd_MultiplyScalar; + } + else if (simdBaseType == TYP_DOUBLE) + { + id = op2->TypeIs(simdType) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Arm64_MultiplyByScalar; + } + else if (!varTypeIsLong(simdBaseType)) + { + id = op2->TypeIs(simdType) ? NI_AdvSimd_Multiply : NI_AdvSimd_MultiplyByScalar; + } } #endif // !TARGET_XARCH && !TARGET_ARM64 break; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ecc777fbf03751..b99090ca1f6170 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1735,6 +1735,17 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impPopStack().val; + + //TODO-VL: There is no way to do floating point `initial and `step` in SVE, corresponding + // to the `Vector.CreateSequence(). For now, just treat it as integral. + if (!varTypeIsIntegral(op1)) + { + op1 = gtNewCastNode(TYP_LONG, op1, false, TYP_LONG); + } + if (!varTypeIsIntegral(op2)) + { + op2 = gtNewCastNode(TYP_LONG, op2, false, TYP_LONG); + } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); break; } @@ -2337,6 +2348,24 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_op_Multiply: + { + assert(sig->numArgs == 2); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, true); + break; + } + case NI_Vector64_op_Multiply: case NI_Vector128_op_Multiply: { @@ -2353,7 +2382,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass); - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Multiply); + retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, false); break; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index f4508b0dbc4ef3..cf82a78409e6c2 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -813,6 +813,20 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitInsHelper(targetReg, maskReg, embMaskOp2Reg); break; + case NI_Sve_MultiplyByScalar: + { + if (targetReg != embMaskOp1Reg) + { + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, maskReg, + embMaskOp1Reg, opt); + } + assert(intrinEmbMask.op2->IsCnsFltOrDbl()); + double imm = intrinEmbMask.op2->AsDblCon()->DconValue(); + assert((imm == 0.5) || (imm == 2.0)); + GetEmitter()->emitIns_R_R_F(insEmbMask, emitSize, targetReg, op1Reg, imm, opt); + break; + } + default: assert(targetReg != embMaskOp2Reg); @@ -2739,6 +2753,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op3Reg, op1Reg, INS_OPTS_SCALABLE_D); break; + case NI_Sve_MultiplyByScalar: + { + if (targetReg != op2Reg) + { + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op2Reg); + } + assert(node->IsCnsFltOrDbl()); + unsigned imm = node->AsDblCon()->DconValue() == 0.5 ? 0 : 1; + GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op1Reg, opt); + } default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index b74c46c1d31efc..d56bf9c3b1e97d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -82,19 +82,19 @@ HARDWARE_INTRINSIC(Vector, MaxNative, HARDWARE_INTRINSIC(Vector, MaxNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MinNative, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, MinNumber, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, MinNumber, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Multiply, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, MultiplyAddEstimate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Narrow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Negate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, OnesComplement, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, Round, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector, Round, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, ShiftLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, ShiftRightLogical, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, SquareRoot, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAligned, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, StoreAlignedNonTemporal, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector, StoreUnsafe, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Subtract, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, Sum, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) @@ -334,7 +334,7 @@ HARDWARE_INTRINSIC(Sve, Multiply, HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, MultiplyByScalar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_LowVectorOperation) +HARDWARE_INTRINSIC(Sve, MultiplyByScalar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 74d57ac374ed83..870cb955db0cda 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4315,6 +4315,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } + case NI_Sve_MultiplyByScalar: + { + MakeSrcContained(node, intrin.op2); + break; + } default: unreached(); From f6e82cf25a7d55a7aa4b099e5708c12afde0d33c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 10 Jun 2025 23:41:05 -0700 Subject: [PATCH 088/120] REVERT: Enable SVE for VectorT (for testing) --- src/coreclr/inc/clrconfigvalues.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 7b99ad36eaa564..2ae92b03391cd5 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -285,7 +285,7 @@ CONFIG_DWORD_INFO(INTERNAL_GCUseGlobalAllocationContext, W("GCUseGlobalAllocatio /// CONFIG_DWORD_INFO(INTERNAL_JitBreakEmit, W("JitBreakEmit"), (DWORD)-1, "") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitDebuggable, W("JitDebuggable"), 0, "If set, suppress JIT optimizations that make debugging code difficult") -CONFIG_DWORD_INFO(INTERNAL_UseSveForVectorT, W("UseSveForVectorT"), 0, "Prefer SVE instructions for VectorT") +CONFIG_DWORD_INFO(INTERNAL_UseSveForVectorT, W("UseSveForVectorT"), 1, "Prefer SVE instructions for VectorT") #if !defined(DEBUG) && !defined(_DEBUG) #define INTERNAL_JitEnableNoWayAssert_Default 0 diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 18639ac91776f5..0d42798367f898 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -386,7 +386,7 @@ CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0) #endif #if defined(TARGET_ARM64) -CONFIG_INTEGER(UseSveForVectorT, "UseSveForVectorT", 0) // Prefer SVE instructions for VectorT +CONFIG_INTEGER(UseSveForVectorT, "UseSveForVectorT", 1) // Prefer SVE instructions for VectorT #endif // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h From 5df58bf0c68a2e4949b13b8e5eac5a691daecd71 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 00:08:24 -0700 Subject: [PATCH 089/120] merge conflict errors --- src/coreclr/jit/gentree.cpp | 12 ++++-------- src/coreclr/jit/hwintrinsic.cpp | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 1 - .../Common/JitInterface/CorInfoImpl_generated.cs | 11 ++++++----- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 55535850e666ec..2d729aeb5c210b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -29580,9 +29580,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); assert((simdSize <= 16) || (Compiler::SizeMatchesVectorTLength(simdSize))); -#else +#elif defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); @@ -29594,11 +29593,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else -#endif // TARGET_XARCH { assert(!isScalar || varTypeIsFloating(simdBaseType)); } -#endif +#endif // TARGET_ARM64 || TARGET_XARCH assert(op1 != nullptr); assert(op1->TypeIs(simdType)); @@ -29694,9 +29692,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); assert((simdSize <= 16) || (Compiler::SizeMatchesVectorTLength(simdSize))); -#else +#elif defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); @@ -29708,11 +29705,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else -#endif // TARGET_XARCH { assert(!isScalar || varTypeIsFloating(simdBaseType)); } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_XARCH NamedIntrinsic id = NI_Illegal; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 9cf348c470a914..f69fbfc0412ecd 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1331,7 +1331,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (isa == InstructionSet_Vector) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 3169f1a6f2e8d2..2c32fda2458902 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -203,7 +203,6 @@ int HWIntrinsicInfo::lookupIval(NamedIntrinsic id) } return -1; } - case InstructionSet_Vector: //------------------------------------------------------------------------ // getHWIntrinsicImmOps: Gets the immediate Ops for an intrinsic diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index d3e69d647b6d8e..ec976a01e67422 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2652,7 +2652,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 177); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 179); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2828,10 +2828,11 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[171] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; callbacks[172] = (delegate* unmanaged)&_recordCallSite; callbacks[173] = (delegate* unmanaged)&_recordRelocation; - callbacks[174] = (delegate* unmanaged)&_getTargetVectorLength; - callbacks[175] = (delegate* unmanaged)&_getJitFlags; - callbacks[176] = (delegate* unmanaged)&_getSpecialCopyHelper; - callbacks[177] = (delegate* unmanaged)&_getSpecialCopyHelper; + callbacks[174] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[175] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[176] = (delegate* unmanaged)&_getTargetVectorLength; + callbacks[177] = (delegate* unmanaged)&_getJitFlags; + callbacks[178] = (delegate* unmanaged)&_getSpecialCopyHelper; return (IntPtr)callbacks; } From 1e972477777902ad52139053757a95d49a9ab47f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 00:20:32 -0700 Subject: [PATCH 090/120] fix build errors after merge --- src/coreclr/jit/gentree.cpp | 49 ++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2d729aeb5c210b..9f89f37a5e495d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20935,7 +20935,11 @@ GenTree* Compiler::gtNewSimdBinOpNode( broadcastOp = &op2; } - if ((broadcastOp != nullptr) && !isScalable) + if ((broadcastOp != nullptr) +#if defined(TARGET_ARM64) + && !isScalable +#endif + ) { #if defined(TARGET_ARM64) if (varTypeIsLong(simdBaseType)) @@ -21032,8 +21036,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( // and produce overall better codegen. assert(fgNodeThreading != NodeThreading::LIR); - op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize, isScalable); - return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize, isScalable); + op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } #if defined(TARGET_XARCH) @@ -22243,8 +22247,8 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, assert(!canUseEvexEncodingDebugOnly()); #endif // TARGET_XARCH - GenTree* result = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize, isScalable); - return gtNewSimdUnOpNode(GT_NOT, type, result, simdBaseJitType, simdSize, isScalable); + GenTree* result = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdUnOpNode(GT_NOT, type, result, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } default: @@ -23654,7 +23658,7 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize, false); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize ARM64_ARG(false)); return gtNewSimdIsZeroNode(type, op1, simdBaseJitType, simdSize ARM64_ARG(false)); } @@ -23702,8 +23706,8 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); assert(varTypeIsNeonSIMD(type)); - op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseJitType, simdSize, false); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); + op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseJitType, simdSize ARM64_ARG(false)); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(false)); } assert(varTypeIsIntegral(simdBaseType)); @@ -23773,9 +23777,9 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT op1 = gtNewSimdIsFiniteNode(type, op1, simdBaseJitType, simdSize); op1Dup1 = gtNewSimdTruncNode(type, op1Dup1, simdBaseJitType, simdSize); - GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseJitType, simdSize, false); + GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseJitType, simdSize ARM64_ARG(false)); - return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize, false); + return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); } assert(varTypeIsIntegral(simdBaseType)); @@ -23808,7 +23812,7 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType if (varTypeIsFloating(simdBaseType)) { GenTree* op1Dup = fgMakeMultiUse(&op1); - return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize, isScalable); + return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } return gtNewZeroConNode(type); } @@ -23849,7 +23853,7 @@ GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfo { return gtNewZeroConNode(type); } - return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, isScalable); + return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } //---------------------------------------------------------------------------------------------- @@ -23898,7 +23902,7 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize, false); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize ARM64_ARG(false)); } return gtNewZeroConNode(type); } @@ -23956,12 +23960,12 @@ GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoTy cnsNode1 = gtNewSimdCreateBroadcastNode(type, cnsNode1, simdBaseJitType, simdSize); cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseJitType, simdSize); - op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize, false); - return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize, false); + op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseJitType, simdSize ARM64_ARG(false)); + return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseJitType, simdSize ARM64_ARG(false)); } assert(varTypeIsIntegral(simdBaseType)); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(false)); } //---------------------------------------------------------------------------------------------- @@ -23991,8 +23995,8 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize, false); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize, false); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize ARM64_ARG(false)); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(false)); } //---------------------------------------------------------------------------------------------- @@ -29392,12 +29396,13 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, var_types simdBaseType, NamedIntrinsic id) { NamedIntrinsic sveId = id; + +#ifdef TARGET_ARM64 if ( (id == NI_Illegal) || ((FIRST_NI_Sve <= sveId) && (sveId <= LAST_NI_Sve))) { return sveId; } - -#ifdef TARGET_ARM64 + // TODO-VL: Look for all places where NI_AdvSimd_* is used and add logic for NI_Sve_* at all those places if (Compiler::UseSveForType(simdType)) @@ -30359,6 +30364,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } } +#if defined(TARGET_ARM64) // simdType can be `TYP_SIMD16` for three cases: // - We originally had Vector128, then we should retain AdvSimd // - We originally had VectorT, and UseSve=0, then we should retain AdvSimd @@ -30367,6 +30373,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = GetScalableHWIntrinsicId(simdType, simdBaseType, id); } +#endif return id; } @@ -30710,10 +30717,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } } +#if defined(TARGET_ARM64) if (Compiler::UseSveForType(simdType) && isScalable) { id = GetScalableHWIntrinsicId(simdType, simdBaseType, id); } +#endif return id; } From 355856db48bd6578df1e96af36b4d1bd918bb5b9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 14:01:49 -0700 Subject: [PATCH 091/120] fix linux build error --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 747db7ce2b3d39..b3675f4618e8aa 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1424,7 +1424,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) con } else if (isVectorRegister(reg)) { - if ((size == EA_16BYTE)) + if (size == EA_16BYTE) { rn = qRegNames[reg - REG_V0]; } From dd0d48322af30c3efc175bea262a914d384ef476 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 14:02:03 -0700 Subject: [PATCH 092/120] fix the Xor for float/double --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index fe1286afdb72be..5e60c9c104e3e9 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -474,8 +474,8 @@ HARDWARE_INTRINSIC(Sve, StoreAndZipx4, // Predicate variants of intrinsics, these are specialized for operating on TYP_MASK type values. HARDWARE_INTRINSIC(Sve, And_Predicates, -1, 2, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) From 182ba12b5d16c0bef89489b9cb21783cd46c557c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 17:48:32 -0700 Subject: [PATCH 093/120] fix the typo for equality operator --- src/coreclr/jit/gentree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9f89f37a5e495d..df3bd34f412b14 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22552,7 +22552,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( if (UseSveForType(simdType) && isScalable) { - intrinsic = NI_Vector_op_Equality; + intrinsic = NI_Vector_op_Inequality; GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); @@ -22572,7 +22572,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else { - intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; + intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv*/ false); op2 = gtNewZeroConNode(simdType); } From 7841140a55e97f9a5cce1d3894d2a6d9b286bd58 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jun 2025 19:01:11 -0700 Subject: [PATCH 094/120] another build error fix --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b3675f4618e8aa..777e8be800fe33 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1418,7 +1418,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) con { rn = xRegNames[reg]; } - else if ((size == EA_4BYTE)) + else if (size == EA_4BYTE) { rn = wRegNames[reg]; } From 10f2530a7f37136ff140835bdc5f21d16e73eb1e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 12 Jun 2025 05:40:04 +0000 Subject: [PATCH 095/120] Fix the spilling of predicate registers --- src/coreclr/jit/regset.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 9bfa2da2731b05..fcc634248b5ee7 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -350,12 +350,20 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */) var_types tempType = RegSet::tmpNormalizeType(treeType); regMaskTP mask; bool floatSpill = false; + bool maskSpill = false; if (isFloatRegType(treeType)) { floatSpill = true; mask = genRegMaskFloat(reg ARM_ARG(treeType)); } +#if defined(TARGET_ARM64) + if (varTypeUsesMaskReg(treeType)) + { + maskSpill = true; + mask = genRegMask(reg); + } +#endif else { mask = genRegMask(reg); @@ -427,6 +435,10 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */) // Generate the code to spill the register var_types storeType = floatSpill ? treeType : tempType; +#if defined(TARGET_ARM64) + storeType = maskSpill ? treeType : storeType; +#endif + m_rsCompiler->codeGen->spillReg(storeType, temp, reg); // Mark the tree node as having been spilled From 47b106f5d6eb3d7202df35779b23fe568e0953c0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 12 Jun 2025 05:59:36 +0000 Subject: [PATCH 096/120] Make sure to check if retNode is HWIntrinsic --- src/coreclr/jit/hwintrinsicarm64.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2c32fda2458902..678560fe5f8304 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3684,8 +3684,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_WidenLower: case NI_Vector_WidenUpper: { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + if (retNode->OperIsHWIntrinsic()) + { + intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + } break; } case NI_Vector_Add: From b4ca14ab32a6f25f5d675a1831cfa2f381b89c3d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 12 Jun 2025 12:56:03 -0700 Subject: [PATCH 097/120] Add missing break --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index b406c4362337ed..1c1b8a2d7492f6 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2794,7 +2794,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op1Reg, opt); } default: + { unreached(); + break; + } } } From 98f0e25e5bbf92d186c25f2f2947ff546e54c769 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 Jun 2025 15:56:30 -0700 Subject: [PATCH 098/120] fix a typo for mapping zeroextend intrinsic --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index df3bd34f412b14..22bbda57991592 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -29534,7 +29534,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, sveId = NI_Sve_Subtract; break; case NI_AdvSimd_ZeroExtendWideningLower: - sveId = NI_Sve_ZeroExtendWideningUpper; + sveId = NI_Sve_ZeroExtendWideningLower; break; case NI_AdvSimd_ZeroExtendWideningUpper: sveId = NI_Sve_ZeroExtendWideningUpper; From 53f4c81889ba27016042b99a3d97c296942377b2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 Jun 2025 15:56:53 -0700 Subject: [PATCH 099/120] handle Vector.Equal() and similar APIs that return Vector instead of bool --- src/coreclr/jit/gentree.cpp | 47 +++++++++++++++++++--------- src/coreclr/jit/hwintrinsicarm64.cpp | 10 +++--- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 22bbda57991592..c1e9a81b85294f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21980,18 +21980,35 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, if (intrinsic != NI_Illegal) { #if defined(FEATURE_MASKED_HW_INTRINSICS) - - bool wrapCallInConvertMaskToVector = (lookupType != type); #if defined(TARGET_ARM64) - wrapCallInConvertMaskToVector &= isScalable; - wrapCallInConvertMaskToVector &= wrapInCmtv; -#endif - if (wrapCallInConvertMaskToVector) + if (isScalable) + { + if (wrapInCmtv) + { + // cndsel(result, 0xFF, 0) + assert(varTypeIsMask(lookupType)); + GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); + GenTree* allOnes = gtNewAllBitsSetConNode(type); + GenTree* allZeros = gtNewZeroConNode(Compiler::getSIMDTypeForSize(simdSize)); + return gtNewSimdHWIntrinsicNode(type, retNode, allOnes, allZeros, NI_Sve_ConditionalSelect, + simdBaseJitType, simdSize); + } + else + { + // will be wrapped by GetActiveElementCount + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + } + else +#endif // TARGET_ARM64 + { + if (lookupType != type) { assert(varTypeIsMask(lookupType)); GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); return gtNewSimdCvtMaskToVectorNode(type, retNode, simdBaseJitType, simdSize); } + } #else assert(lookupType == type); #endif // !FEATURE_MASKED_HW_INTRINSICS @@ -22352,7 +22369,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22395,7 +22412,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize /* isScalable */ARM64_ARG(true) /* wrapInCmtv */ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize /* isScalable */ARM64_ARG(true) /* wrapInCmtv */ ARM64_ARG(false)); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22421,7 +22438,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( else { intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, isScalable); + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv */ false); op2 = gtNewAllBitsSetConNode(simdType); } @@ -22555,7 +22572,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = NI_Vector_op_Inequality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -22573,7 +22590,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( else { intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv*/ false); + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv */false); op2 = gtNewZeroConNode(simdType); } @@ -22597,7 +22614,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( intrinsic = NI_Vector_op_Inequality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv*/ false); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -23812,7 +23829,7 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType if (varTypeIsFloating(simdBaseType)) { GenTree* op1Dup = fgMakeMultiUse(&op1); - return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } return gtNewZeroConNode(type); } @@ -24035,7 +24052,7 @@ GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfo { return gtNewAllBitsSetConNode(type); } - return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } //---------------------------------------------------------------------------------------------- @@ -24085,7 +24102,7 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } return gtNewZeroConNode(type); } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 678560fe5f8304..9f6a1066eb7e6c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1403,7 +1403,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Equals); + retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Equals, true); break; } @@ -1771,7 +1771,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThan); + retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThan, true); break; } @@ -1820,7 +1820,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqual); + retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqual, true); //if (intrinsic == NI_Vector_GreaterThanOrEqual) //{ // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); @@ -2012,7 +2012,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThan); + retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThan, true); //if (intrinsic == NI_Vector_LessThan) //{ // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); @@ -2066,7 +2066,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqual); + retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqual, true); break; } From 7c26e21e6999dc08fd07cc452f6c0732301e7608 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 Jun 2025 17:22:51 -0700 Subject: [PATCH 100/120] fix merge conflict --- .../tools/Common/JitInterface/CorInfoImpl_generated.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 04a252857bc446..1ff68fe4375cc5 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -2666,7 +2666,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 179); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 180); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_notifyMethodInfoUsage; @@ -2844,10 +2844,10 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[173] = (delegate* unmanaged)&_recordCallSite; callbacks[174] = (delegate* unmanaged)&_recordRelocation; callbacks[175] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[176] = (delegate* unmanaged)&_getTargetVectorLength; - callbacks[177] = (delegate* unmanaged)&_getJitFlags; - callbacks[177] = (delegate* unmanaged)&_getJitFlags; - callbacks[178] = (delegate* unmanaged)&_getSpecialCopyHelper; + callbacks[176] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[177] = (delegate* unmanaged)&_getTargetVectorLength; + callbacks[178] = (delegate* unmanaged)&_getJitFlags; + callbacks[179] = (delegate* unmanaged)&_getSpecialCopyHelper; return (IntPtr)callbacks; } From ee9a4ebea51407b0033e0ea445fbf6d35d3bba15 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 Jun 2025 17:37:14 -0700 Subject: [PATCH 101/120] fix the bad merge --- src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 1ad6c59443c03d..2df71f447e6f86 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -1226,7 +1226,7 @@ enum mcPackets Packet_NotifyInstructionSetUsage = 229, Packet_GetAsyncInfo = 230, Packet_GetAsyncResumptionStub = 231, - Packet_GetTargetVectorLength = 232, + Packet_GetCookieForInterpreterCalliSig = 232, Packet_GetHelperFtn = 233, Packet_GetTargetVectorLength = 234, }; From dbbd311c9fad7ddd3cf3ebe4754b254020fb6ec4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 15 Jun 2025 08:32:17 -0700 Subject: [PATCH 102/120] add missing break --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 1c1b8a2d7492f6..e44a8261549b47 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2792,6 +2792,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(node->IsCnsFltOrDbl()); unsigned imm = node->AsDblCon()->DconValue() == 0.5 ? 0 : 1; GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op1Reg, opt); + break; } default: { From 635148c1bda93b1f7e94445b8bf242010dbb2144 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 15 Jun 2025 08:33:09 -0700 Subject: [PATCH 103/120] jit format --- src/coreclr/jit/codegenarm64.cpp | 8 +- src/coreclr/jit/compiler.cpp | 90 +++-- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/emitarm64sve.cpp | 4 +- src/coreclr/jit/gentree.cpp | 322 +++++++++------ src/coreclr/jit/gentree.h | 55 +-- src/coreclr/jit/hwintrinsicarm64.cpp | 427 +++++++++++--------- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 4 +- src/coreclr/jit/importervectorization.cpp | 6 +- src/coreclr/jit/instr.cpp | 3 +- src/coreclr/jit/lclmorph.cpp | 5 +- src/coreclr/jit/morph.cpp | 7 +- src/coreclr/jit/regset.cpp | 4 +- 13 files changed, 543 insertions(+), 395 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a0de1947615f2f..e8492d523ee66c 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2301,12 +2301,12 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre emit->emitIns_R_I(INS_movi, attr, targetReg, val.i32[0], is8 ? INS_OPTS_2S : INS_OPTS_4S); } else if (ElementsAreSame(val.i16, is8 ? 4 : 8) && - emitter::emitIns_valid_imm_for_movi(val.i16[0], EA_2BYTE)) + emitter::emitIns_valid_imm_for_movi(val.i16[0], EA_2BYTE)) { emit->emitIns_R_I(INS_movi, attr, targetReg, val.i16[0], is8 ? INS_OPTS_4H : INS_OPTS_8H); } else if (ElementsAreSame(val.i8, is8 ? 8 : 16) && - emitter::emitIns_valid_imm_for_movi(val.i8[0], EA_1BYTE)) + emitter::emitIns_valid_imm_for_movi(val.i8[0], EA_1BYTE)) { emit->emitIns_R_I(INS_movi, attr, targetReg, val.i8[0], is8 ? INS_OPTS_8B : INS_OPTS_16B); } @@ -3043,8 +3043,8 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) } } } - emitAttr attr = emitActualTypeSize(targetType); - bool isScalable = (attr == EA_SCALABLE) || (Compiler::UseSveForType(targetType)); + emitAttr attr = emitActualTypeSize(targetType); + bool isScalable = (attr == EA_SCALABLE) || (Compiler::UseSveForType(targetType)); if (isScalable) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3dfbd3d8b3087c..3dbb33071ed4dc 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2545,38 +2545,40 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #if defined(TARGET_ARM64) -/* -* #ifdef DEBUG -* if (matched) -* { -* compVectorTLength = getTargetLength(); -* compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); -* compUseSveForVectorT |= JitConfig.UseSveForVectorT(); -* } -* else -* { -* compVectorTLength = 16; -* compUseSveForVectorT = JitConfig.UseSveForVectorT(); -* } -* #else -* if (matched) -* { -* compVectorTLength = getTargetLength(); -* compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); -* } -* else -* { -* compVectorTLength = 0; -* compUseSveForVectorT = false; -* } -* #endif -* -*/ + /* + * #ifdef DEBUG + * if (matched) + * { + * compVectorTLength = getTargetLength(); + * compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & + * (compVectorTLength - 1)) == 0); compUseSveForVectorT |= JitConfig.UseSveForVectorT(); + * } + * else + * { + * compVectorTLength = 16; + * compUseSveForVectorT = JitConfig.UseSveForVectorT(); + * } + * #else + * if (matched) + * { + * compVectorTLength = getTargetLength(); + * compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & + * (compVectorTLength - 1)) == 0); + * } + * else + * { + * compVectorTLength = 0; + * compUseSveForVectorT = false; + * } + * #endif + * + */ if (info.compMatchedVM) { - compVectorTLength = info.compCompHnd->getTargetVectorLength(); - compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & (compVectorTLength - 1)) == 0); + compVectorTLength = info.compCompHnd->getTargetVectorLength(); + compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && + ((compVectorTLength & (compVectorTLength - 1)) == 0); #ifdef DEBUG compUseSveForVectorT |= (bool)JitConfig.UseSveForVectorT(); #endif // DEBUG @@ -2588,26 +2590,26 @@ void Compiler::compInitOptions(JitFlags* jitFlags) compUseSveForVectorT = JitConfig.UseSveForVectorT(); // In test mode, if UseSveForVectorT=1, then mimic that // we are generating for VL > 16B - compVectorTLength = 16; //32; + compVectorTLength = 16; // 32; #else compVectorTLength = 0; #endif // DEBUG } -//#ifdef DEBUG -// compUseSveForVectorT = JitConfig.UseSveForVectorT(); -// if (compUseSveForVectorT) -// { -// // In test mode, if UseSveForVectorT=1, then mimic that -// // we are generating for VL > 16B -// compVectorTLength = 16; //32; -// } -// else -//#endif // DEBUG -// { -// compVectorTLength = info.compCompHnd->getTargetVectorLength(); -// compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); -// } +// #ifdef DEBUG +// compUseSveForVectorT = JitConfig.UseSveForVectorT(); +// if (compUseSveForVectorT) +// { +// // In test mode, if UseSveForVectorT=1, then mimic that +// // we are generating for VL > 16B +// compVectorTLength = 16; //32; +// } +// else +// #endif // DEBUG +// { +// compVectorTLength = info.compCompHnd->getTargetVectorLength(); +// compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256); +// } #endif // TARGET_ARM64 bool enableInliningMethodsWithEH = JitConfig.JitInlineMethodsWithEH() > 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 699aaaf4d0db9e..f7904676514856 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8981,7 +8981,8 @@ class Compiler } FORCEINLINE static bool UseSveForType(var_types type) { - return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && (type != TYP_SIMD8); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); + return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && + (type != TYP_SIMD8); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); } FORCEINLINE static bool UseStrictSveForType(var_types type) { diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 04d64fde5fc4db..d5abe98f50e335 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2696,7 +2696,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, // For larger imm values (> 9 bits), calculate base + imm in a reserved register first. codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm); reg2 = rsvdReg; - imm = 0; + imm = 0; } } if (isVectorRegister(reg1)) @@ -2735,7 +2735,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, // For larger imm values (> 9 bits), calculate base + imm in a reserved register first. codeGen->instGen_Set_Reg_To_Base_Plus_Imm(EA_PTRSIZE, rsvdReg, reg2, imm); reg2 = rsvdReg; - imm = 0; + imm = 0; } } if (isVectorRegister(reg1)) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e5118aaa9d850d..f7c63d5234116f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20840,7 +20840,7 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_AbsScalar : NI_AdvSimd_Arm64_Abs; } - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -20849,8 +20849,12 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si #endif } -GenTree* Compiler::gtNewSimdBinOpNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, + var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -20978,7 +20982,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( if ((broadcastOp != nullptr) #if defined(TARGET_ARM64) - && !isScalable + && !isScalable #endif ) { @@ -21048,8 +21052,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( assert(op2ForLookup != op1); } - NamedIntrinsic intrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, op, op1, op2ForLookup, simdBaseType, simdSize, false ARM64_ARG(isScalable)); + NamedIntrinsic intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, op, op1, op2ForLookup, simdBaseType, + simdSize, false ARM64_ARG(isScalable)); if (intrinsic != NI_Illegal) { @@ -21450,11 +21454,13 @@ GenTree* Compiler::gtNewSimdBinOpNode( if ((op2Cns == 0.5) || (op2Cns == 2.0)) { - //GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_MultiplyByScalar, simdBaseJitType, simdSize); + // GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_MultiplyByScalar, simdBaseJitType, + simdSize); } } - op2 = gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + op2 = + gtNewSimdHWIntrinsicNode(type, op2, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); } return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_Sve_Multiply, simdBaseJitType, simdSize); } @@ -21464,12 +21470,12 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree** op2ToDup = nullptr; - op1 = gtNewSimdToScalarNode(TYP_LONG, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdToScalarNode(TYP_LONG, op1, simdBaseJitType, simdSize); GenTree** op1ToDup = &op1->AsHWIntrinsic()->Op(1); if (varTypeIsSIMD(op2)) { - op2 = gtNewSimdToScalarNode(TYP_LONG, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdToScalarNode(TYP_LONG, op2, simdBaseJitType, simdSize); op2ToDup = &op2->AsHWIntrinsic()->Op(1); } @@ -21493,11 +21499,13 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* op2Dup = fgMakeMultiUse(op2ToDup); assert(!varTypeIsArithmetic(op1Dup)); - op1Dup = gtNewSimdGetElementNode(TYP_LONG, op1Dup, gtNewIconNode(1), simdBaseJitType, simdSize ARM64_ARG(isScalable)); + op1Dup = gtNewSimdGetElementNode(TYP_LONG, op1Dup, gtNewIconNode(1), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); if (!varTypeIsArithmetic(op2Dup)) { - op2Dup = gtNewSimdGetElementNode(TYP_LONG, op2Dup, gtNewIconNode(1), simdBaseJitType, simdSize ARM64_ARG(isScalable)); + op2Dup = gtNewSimdGetElementNode(TYP_LONG, op2Dup, gtNewIconNode(1), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } // upper = op1.GetElement(1) * op2.GetElement(1) @@ -21560,7 +21568,7 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -21955,7 +21963,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, hwIntrinsicID); + // hwIntrinsicID = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, hwIntrinsicID); assert(hwIntrinsicID != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } @@ -22013,43 +22021,45 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - var_types lookupType = GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op, type, simdBaseType, simdSize ARM64_ARG(isScalable)); + var_types lookupType = + GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op, type, simdBaseType, simdSize ARM64_ARG(isScalable)); NamedIntrinsic intrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, op, lookupType, op1, op2, simdBaseType, simdSize, false ARM64_ARG(isScalable)); + GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, op, lookupType, op1, op2, simdBaseType, simdSize, + false ARM64_ARG(isScalable)); if (intrinsic != NI_Illegal) { #if defined(FEATURE_MASKED_HW_INTRINSICS) #if defined(TARGET_ARM64) - if (isScalable) - { - if (wrapInCmtv) + if (isScalable) { - // cndsel(result, 0xFF, 0) - assert(varTypeIsMask(lookupType)); - GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); - GenTree* allOnes = gtNewAllBitsSetConNode(type); - GenTree* allZeros = gtNewZeroConNode(Compiler::getSIMDTypeForSize(simdSize)); - return gtNewSimdHWIntrinsicNode(type, retNode, allOnes, allZeros, NI_Sve_ConditionalSelect, - simdBaseJitType, simdSize); + if (wrapInCmtv) + { + // cndsel(result, 0xFF, 0) + assert(varTypeIsMask(lookupType)); + GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); + GenTree* allOnes = gtNewAllBitsSetConNode(type); + GenTree* allZeros = gtNewZeroConNode(Compiler::getSIMDTypeForSize(simdSize)); + return gtNewSimdHWIntrinsicNode(type, retNode, allOnes, allZeros, NI_Sve_ConditionalSelect, + simdBaseJitType, simdSize); + } + else + { + // will be wrapped by GetActiveElementCount + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); + } } else - { - // will be wrapped by GetActiveElementCount - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); - } - } - else #endif // TARGET_ARM64 - { - if (lookupType != type) { - assert(varTypeIsMask(lookupType)); - GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); - return gtNewSimdCvtMaskToVectorNode(type, retNode, simdBaseJitType, simdSize); + if (lookupType != type) + { + assert(varTypeIsMask(lookupType)); + GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); + return gtNewSimdCvtMaskToVectorNode(type, retNode, simdBaseJitType, simdSize); + } } - } #else assert(lookupType == type); #endif // !FEATURE_MASKED_HW_INTRINSICS @@ -22305,7 +22315,8 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, assert(!canUseEvexEncodingDebugOnly()); #endif // TARGET_XARCH - GenTree* result = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + GenTree* result = + gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); return gtNewSimdUnOpNode(GT_NOT, type, result, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } @@ -22316,8 +22327,12 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, } } -GenTree* Compiler::gtNewSimdCmpOpAllNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, + var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(type == TYP_INT); @@ -22408,9 +22423,9 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( { assert(UseSveForType(simdType) && isScalable); - intrinsic = NI_Vector_op_Equality; - GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */ false); + intrinsic = NI_Vector_op_Equality; + GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, + /* isScalable */ true, /* wrapInCmtv */ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22429,9 +22444,9 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( // However, NOT() operation only operates on "byte" variant i.e. `p1.B`, while the result of `p1` from // `SVE_CMP_CC` can be of other variants like `p1.S` or `p1.D`, etc. GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); + op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } else { @@ -22453,7 +22468,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( intrinsic = NI_Vector_op_Equality; GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize /* isScalable */ARM64_ARG(true) /* wrapInCmtv */ ARM64_ARG(false)); + gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, + simdSize /* isScalable */ ARM64_ARG(true) /* wrapInCmtv */ ARM64_ARG(false)); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `All` operation, we can perform `r1 = CNTP(p1)` and then if `r1 == VL`, it means `ALL` @@ -22479,7 +22495,8 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( else { intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality; - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv */ false); + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */ false, + /* wrapInCmtv */ false); op2 = gtNewAllBitsSetConNode(simdType); } @@ -22505,14 +22522,18 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } } - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdCmpOpAnyNode( - genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, + var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(type == TYP_INT); @@ -22612,8 +22633,8 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( { intrinsic = NI_Vector_op_Inequality; - GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */ false); + GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, + /* isScalable */ true, /* wrapInCmtv */ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -22623,15 +22644,16 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( // if r1 != 0 return true else false GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, - simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, + simdBaseJitType, simdSize); op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); } else { intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality; - op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */false, /* wrapInCmtv */false); + op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */ false, + /* wrapInCmtv */ false); op2 = gtNewZeroConNode(simdType); } @@ -22654,8 +22676,8 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( { intrinsic = NI_Vector_op_Inequality; - GenTree* cmpResult = - gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, /* isScalable */true, /* wrapInCmtv */false); + GenTree* cmpResult = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize, + /* isScalable */ true, /* wrapInCmtv */ false); // The operation `p1 = SVE_CMP_CC(a, b)` returns predicate mask, having `1` for lanes for which `a CC b` // is true. For `Any` operation, we can perform `r1 = CNTP(p1)` and then if `r1 != 0`, it means `SOME` @@ -22686,14 +22708,18 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } } - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdCndSelNode( - var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdCndSelNode(var_types type, + GenTree* op1, + GenTree* op2, + GenTree* op3, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23388,7 +23414,7 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -23443,14 +23469,17 @@ GenTree* Compiler::gtNewSimdFmaNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdGetElementNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdGetElementNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { NamedIntrinsic intrinsicId = NI_Vector128_GetElement; var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); @@ -23532,8 +23561,8 @@ GenTree* Compiler::gtNewSimdGetElementNode( if (isScalable) { var_types op1Type = op1->TypeGet(); - op1 = - gtNewSimdHWIntrinsicNode(op1Type, op1, op2, NI_Sve_DuplicateSelectedScalarToVector, simdBaseJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(op1Type, op1, op2, NI_Sve_DuplicateSelectedScalarToVector, simdBaseJitType, + simdSize); return gtNewSimdToScalarNode(type, op1, simdBaseJitType, 16); } #endif @@ -23716,7 +23745,8 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize ARM64_ARG(false)); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, + simdSize ARM64_ARG(false)); return gtNewSimdIsZeroNode(type, op1, simdBaseJitType, simdSize ARM64_ARG(false)); } @@ -23856,7 +23886,10 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT // Returns: // The created IsNaN node // -GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23870,7 +23903,8 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType if (varTypeIsFloating(simdBaseType)) { GenTree* op1Dup = fgMakeMultiUse(&op1); - return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); + return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseJitType, + simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } return gtNewZeroConNode(type); } @@ -23887,7 +23921,10 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType // Returns: // The created IsNegative node // -GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23911,7 +23948,8 @@ GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfo { return gtNewZeroConNode(type); } - return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } //---------------------------------------------------------------------------------------------- @@ -24053,7 +24091,8 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, simdSize ARM64_ARG(false)); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseJitType, + simdSize ARM64_ARG(false)); return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(false)); } @@ -24069,7 +24108,10 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, // Returns: // The created IsPositive node // -GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24093,7 +24135,8 @@ GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfo { return gtNewAllBitsSetConNode(type); } - return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); + return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseJitType, + simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } //---------------------------------------------------------------------------------------------- @@ -24108,11 +24151,10 @@ GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfo // Returns: // The created IsPositiveInfinity node // -GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, - GenTree* op1, - CorInfoType simdBaseJitType, - unsigned simdSize - ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24143,7 +24185,8 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseJitType, simdSize); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseJitType, + simdSize ARM64_ARG(isScalable) ARM64_ARG(true)); } return gtNewZeroConNode(type); } @@ -24220,7 +24263,10 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, // Returns: // The created IsZero node // -GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24231,7 +24277,8 @@ GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } //---------------------------------------------------------------------------------------------- @@ -24406,8 +24453,11 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, #endif // !TARGET_XARCH && !TARGET_ARM64 } -GenTree* Compiler::gtNewSimdMaxNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdMaxNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24487,8 +24537,11 @@ GenTree* Compiler::gtNewSimdMaxNode( return gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } -GenTree* Compiler::gtNewSimdMaxNativeNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdMaxNativeNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24666,7 +24719,7 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( if (intrinsic != NI_Illegal) { - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -24681,8 +24734,11 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } -GenTree* Compiler::gtNewSimdMinNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdMinNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24762,8 +24818,11 @@ GenTree* Compiler::gtNewSimdMinNode( return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); } -GenTree* Compiler::gtNewSimdMinNativeNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24937,7 +24996,7 @@ GenTree* Compiler::gtNewSimdMinNativeNode( if (intrinsic != NI_Illegal) { - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } @@ -25509,7 +25568,7 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); @@ -27301,14 +27360,17 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdUnOpNode( - genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op, + var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize ARM64_ARG(bool isScalable)) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27601,7 +27663,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningLower; } - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsic, simdBaseJitType, 8); @@ -27813,7 +27875,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo intrinsic = NI_AdvSimd_ZeroExtendWideningUpper; } - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); assert(intrinsic != NI_Illegal); return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } @@ -29450,13 +29512,15 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty //------------------------------------------------------------------------------ // GetScalableHWIntrinsicId: Returns SVE equivalent of given intrinsic ID, if applicable // -//NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) -NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, var_types simdBaseType, NamedIntrinsic id) +// NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id) +NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, + var_types simdBaseType, + NamedIntrinsic id) { NamedIntrinsic sveId = id; #ifdef TARGET_ARM64 - if ( (id == NI_Illegal) || ((FIRST_NI_Sve <= sveId) && (sveId <= LAST_NI_Sve))) + if ((id == NI_Illegal) || ((FIRST_NI_Sve <= sveId) && (sveId <= LAST_NI_Sve))) { return sveId; } @@ -29612,7 +29676,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, } } // Make sure if we are using VL SIMD, we are not generating AdvSimd/NEON intrinsics - assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD16) || (sveId < FIRST_NI_AdvSimd) || (sveId > LAST_NI_AdvSimd)); + assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD16) || (sveId < FIRST_NI_AdvSimd) || + (sveId > LAST_NI_AdvSimd)); #endif // TARGET_ARM64 return sveId; @@ -29632,8 +29697,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdType, // Returns: // The intrinsic ID based on the oper, base type, and simd size // -NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( - Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar ARM64_ARG(bool isScalable)) +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp(Compiler* comp, + genTreeOps oper, + GenTree* op1, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); @@ -29734,14 +29803,13 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( // Returns: // The intrinsic ID based on the oper, base type, and simd size // -NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, - genTreeOps oper, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar - ARM64_ARG(bool isScalable)) +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, + genTreeOps oper, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); @@ -30452,15 +30520,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, // Returns: // The intrinsic ID based on the oper, base type, and simd size // -NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar - ARM64_ARG(bool isScalable)) +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); @@ -30802,8 +30869,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, // type doesn't match with the type IR wants us to be producing. For example, the consuming node // may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK. // -var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp( - Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize ARM64_ARG(bool isScalable)) +var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + var_types simdBaseType, + unsigned simdSize ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 3d620d1fdc4598..082e3c71df91bc 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6576,33 +6576,38 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); - //static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); + // static NamedIntrinsic GetScalableHWIntrinsicId(unsigned simdSize, NamedIntrinsic id); static NamedIntrinsic GetScalableHWIntrinsicId(var_types simdType, var_types simdBaseType, NamedIntrinsic id); - static NamedIntrinsic GetHWIntrinsicIdForUnOp( - Compiler* comp, genTreeOps oper, GenTree* op1, var_types simdBaseType, unsigned simdSize, bool isScalar ARM64_ARG(bool isScalable)); - - static NamedIntrinsic GetHWIntrinsicIdForBinOp(Compiler* comp, - genTreeOps oper, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar - ARM64_ARG(bool isScalable)); - - static NamedIntrinsic GetHWIntrinsicIdForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar - ARM64_ARG(bool isScalable)); - - static var_types GetLookupTypeForCmpOp( - Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize ARM64_ARG(bool isScalable)); + static NamedIntrinsic GetHWIntrinsicIdForUnOp(Compiler* comp, + genTreeOps oper, + GenTree* op1, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)); + + static NamedIntrinsic GetHWIntrinsicIdForBinOp(Compiler* comp, + genTreeOps oper, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)); + + static NamedIntrinsic GetHWIntrinsicIdForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable)); + + static var_types GetLookupTypeForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + var_types simdBaseType, + unsigned simdSize ARM64_ARG(bool isScalable)); static genTreeOps GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9f6a1066eb7e6c..fd33102a28c0d1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -654,11 +654,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); op1 = impSIMDPopStack(); retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_Abs) + // if (intrinsic == NI_Vector_Abs) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -720,8 +721,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); GenTree* notNode = gtNewSimdUnOpNode(GT_NOT, retType, op2, simdBaseJitType, simdSize, isScalable); - op2 = gtFoldExpr(notNode); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, isScalable); + op2 = gtFoldExpr(notNode); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, isScalable); break; } @@ -922,7 +923,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_BitwiseAnd); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_BitwiseAnd); break; } @@ -935,7 +937,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_BitwiseOr); + retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_BitwiseOr); break; } @@ -953,11 +956,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCeilNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_Ceiling) + // if (intrinsic == NI_Vector_Ceiling) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -971,7 +975,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize, intrinsic == NI_Vector_ConditionalSelect); + retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize, + intrinsic == NI_Vector_ConditionalSelect); break; } @@ -984,7 +989,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_ConvertToDoubleScalar : NI_AdvSimd_Arm64_ConvertToDouble; - //intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, intrinsic); op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); @@ -1011,11 +1016,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_ConvertToInt32) + // if (intrinsic == NI_Vector_ConvertToInt32) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1039,11 +1045,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_ConvertToInt64) + // if (intrinsic == NI_Vector_ConvertToInt64) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1056,11 +1063,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_ConvertToSingle) + // if (intrinsic == NI_Vector_ConvertToSingle) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1084,11 +1092,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); - //if ((intrinsic == NI_Vector_ConvertToUInt32Native) || (intrinsic == NI_Vector_ConvertToUInt32)) + // if ((intrinsic == NI_Vector_ConvertToUInt32Native) || (intrinsic == NI_Vector_ConvertToUInt32)) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1112,11 +1121,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); - //if ((intrinsic == NI_Vector_ConvertToUInt64Native) || (intrinsic == NI_Vector_ConvertToUInt64)) + // if ((intrinsic == NI_Vector_ConvertToUInt64Native) || (intrinsic == NI_Vector_ConvertToUInt64)) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1371,7 +1381,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass); - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Division); + retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_Division); break; } @@ -1403,7 +1414,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Equals, true); + retNode = gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_Equals, true); break; } @@ -1416,12 +1428,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Equality); - //if (intrinsic == NI_Vector_op_Equality) + retNode = gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_Equality); + // if (intrinsic == NI_Vector_op_Equality) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1434,12 +1448,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_EqualsAny); - //if (intrinsic == NI_Vector_EqualsAny) + retNode = gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_EqualsAny); + // if (intrinsic == NI_Vector_EqualsAny) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1624,11 +1640,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdFloorNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_Floor) + // if (intrinsic == NI_Vector_Floor) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1650,11 +1667,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_FusedMultiplyAdd) + // if (intrinsic == NI_Vector_FusedMultiplyAdd) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1662,11 +1680,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(Compiler::UseSveForType(retType)); - op2 = impPopStack().val; - op1 = impPopStack().val; + op2 = impPopStack().val; + op1 = impPopStack().val; - //TODO-VL: There is no way to do floating point `initial and `step` in SVE, corresponding - // to the `Vector.CreateSequence(). For now, just treat it as integral. + // TODO-VL: There is no way to do floating point `initial and `step` in SVE, corresponding + // to the `Vector.CreateSequence(). For now, just treat it as integral. if (!varTypeIsIntegral(op1)) { op1 = gtNewCastNode(TYP_LONG, op1, false, TYP_LONG); @@ -1740,7 +1758,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impSIMDPopStack(); - retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GetElement); + retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GetElement); break; } @@ -1771,7 +1790,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThan, true); + retNode = gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThan, true); break; } @@ -1784,12 +1804,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanAll); - //if (intrinsic == NI_Vector_GreaterThanAll) + retNode = gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThanAll); + // if (intrinsic == NI_Vector_GreaterThanAll) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1802,12 +1824,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanAny); - //if (intrinsic == NI_Vector_GreaterThanAny) + retNode = gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThanAny); + // if (intrinsic == NI_Vector_GreaterThanAny) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1820,12 +1844,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqual, true); - //if (intrinsic == NI_Vector_GreaterThanOrEqual) + retNode = gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThanOrEqual, true); + // if (intrinsic == NI_Vector_GreaterThanOrEqual) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1838,12 +1864,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqualAll); - //if (intrinsic == NI_Vector_GreaterThanOrEqualAll) + retNode = gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThanOrEqualAll); + // if (intrinsic == NI_Vector_GreaterThanOrEqualAll) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1856,12 +1884,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_GreaterThanOrEqualAny); - //if (intrinsic == NI_Vector_GreaterThanOrEqualAny) + retNode = gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_GreaterThanOrEqualAny); + // if (intrinsic == NI_Vector_GreaterThanOrEqualAny) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -1924,8 +1954,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_IsNegative: { assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsNegativeNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsNegative); + op1 = impSIMDPopStack(); + retNode = + gtNewSimdIsNegativeNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsNegative); break; } @@ -1969,8 +2000,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_IsPositive: { assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsPositive); + op1 = impSIMDPopStack(); + retNode = + gtNewSimdIsPositiveNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsPositive); break; } @@ -1980,7 +2012,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_IsPositiveInfinity); + retNode = gtNewSimdIsPositiveInfinityNode(retType, op1, simdBaseJitType, simdSize, + intrinsic == NI_Vector_IsPositiveInfinity); break; } @@ -2012,12 +2045,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThan, true); - //if (intrinsic == NI_Vector_LessThan) + retNode = gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThan, true); + // if (intrinsic == NI_Vector_LessThan) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2030,12 +2065,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanAll); - //if (intrinsic == NI_Vector_LessThanAll) + retNode = gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThanAll); + // if (intrinsic == NI_Vector_LessThanAll) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2048,12 +2085,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanAny); - //if (intrinsic == NI_Vector_LessThanAny) + retNode = gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThanAny); + // if (intrinsic == NI_Vector_LessThanAny) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2066,7 +2105,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqual, true); + retNode = gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThanOrEqual, true); break; } @@ -2079,12 +2119,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqualAll); - //if (intrinsic == NI_Vector_LessThanOrEqualAll) + retNode = gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThanOrEqualAll); + // if (intrinsic == NI_Vector_LessThanOrEqualAll) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2097,12 +2139,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_LessThanOrEqualAny); - //if (intrinsic == NI_Vector_LessThanOrEqualAny) + retNode = gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_LessThanOrEqualAny); + // if (intrinsic == NI_Vector_LessThanOrEqualAny) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2203,11 +2247,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdMaxNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Max); - //if (intrinsic == NI_Vector_Max) + // if (intrinsic == NI_Vector_Max) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2225,12 +2270,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MaxNative); - //if (intrinsic == NI_Vector_MaxNative) + retNode = + gtNewSimdMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MaxNative); + // if (intrinsic == NI_Vector_MaxNative) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2245,11 +2292,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdMinNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_Min); - //if (intrinsic == NI_Vector_Min) + // if (intrinsic == NI_Vector_Min) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2267,12 +2315,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdMinNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MinNative); - //if (intrinsic == NI_Vector_MinNative) + retNode = + gtNewSimdMinNativeNode(retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MinNative); + // if (intrinsic == NI_Vector_MinNative) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2280,16 +2330,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 2); - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); + op2 = getArgForHWIntrinsic(argType, argClass); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); + op1 = getArgForHWIntrinsic(argType, argClass); retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, true); break; @@ -2342,16 +2392,19 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (varTypeIsFloating(simdBaseType)) { retNode = gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_MultiplyAddEstimate) + // if (intrinsic == NI_Vector_MultiplyAddEstimate) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } } else { - GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_MultiplyAddEstimate); - retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize, intrinsic == NI_Vector_MultiplyAddEstimate); + GenTree* mulNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_MultiplyAddEstimate); + retNode = gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize, + intrinsic == NI_Vector_MultiplyAddEstimate); } break; } @@ -2408,7 +2461,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_UnaryNegation); + retNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_UnaryNegation); break; } @@ -2418,12 +2472,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_OnesComplement); - //if (intrinsic == NI_Vector_op_OnesComplement) + retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_OnesComplement); + // if (intrinsic == NI_Vector_op_OnesComplement) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2436,12 +2492,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Inequality); - //if (intrinsic == NI_Vector_op_Inequality) + retNode = gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_Inequality); + // if (intrinsic == NI_Vector_op_Inequality) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2463,7 +2521,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_Subtraction); + retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_Subtraction); break; } @@ -2547,11 +2606,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdRoundNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_Round) + // if (intrinsic == NI_Vector_Round) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2978,11 +3038,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdTruncNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_Truncate) + // if (intrinsic == NI_Vector_Truncate) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -2995,11 +3056,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdWidenLowerNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_WidenLower) + // if (intrinsic == NI_Vector_WidenLower) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -3012,11 +3074,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdWidenUpperNode(retType, op1, simdBaseJitType, simdSize); - //if (intrinsic == NI_Vector_WidenUpper) + // if (intrinsic == NI_Vector_WidenUpper) //{ - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); - //} + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, + // retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + // retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + // } break; } @@ -3091,7 +3154,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize, intrinsic == NI_Vector_op_ExclusiveOr); + retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize, + intrinsic == NI_Vector_op_ExclusiveOr); break; } @@ -3655,7 +3719,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, #ifdef TARGET_ARM64 if ((retNode != nullptr) && (intrinsic >= FIRST_NI_Vector) && (intrinsic <= LAST_NI_Vector)) { - // For VectorT, map the intrinsics + // For VectorT, map the intrinsics switch (intrinsic) { case NI_Vector_Abs: @@ -3686,7 +3750,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { if (retNode->OperIsHWIntrinsic()) { - intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + intrinsic = + GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, + retNode->AsHWIntrinsic()->GetHWIntrinsicId()); retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); } break; @@ -3726,9 +3792,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // gtNewSimdCmpOpNode should handle this NamedIntrinsic sveIntrinsic = retNode->AsHWIntrinsic()->GetHWIntrinsicId(); assert(((FIRST_NI_Sve <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Sve)) || - ((FIRST_NI_Vector <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Vector)) || - (sveIntrinsic == NI_Sve_ConvertMaskToVector) || - (sveIntrinsic == NI_Sve_ConvertVectorToMask)); + ((FIRST_NI_Vector <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Vector)) || + (sveIntrinsic == NI_Sve_ConvertMaskToVector) || (sveIntrinsic == NI_Sve_ConvertVectorToMask)); break; } case NI_Vector_op_OnesComplement: @@ -3762,8 +3827,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } default: { - //TODO-VL: Enable this - //unreached(); + // TODO-VL: Enable this + // unreached(); break; } } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index e44a8261549b47..83796dc13f2e1d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -818,10 +818,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (targetReg != embMaskOp1Reg) { GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, maskReg, - embMaskOp1Reg, opt); + embMaskOp1Reg, opt); } assert(intrinEmbMask.op2->IsCnsFltOrDbl()); - double imm = intrinEmbMask.op2->AsDblCon()->DconValue(); + double imm = intrinEmbMask.op2->AsDblCon()->DconValue(); assert((imm == 0.5) || (imm == 2.0)); GetEmitter()->emitIns_R_R_F(insEmbMask, emitSize, targetReg, op1Reg, imm, opt); break; diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 8e65f6f7e7a2e2..eeeb04e57d3fff 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -98,13 +98,15 @@ GenTree* Compiler::impExpandHalfConstEquals( #ifdef FEATURE_HW_INTRINSICS if (varTypeIsSIMD(type)) { - return gtNewSimdBinOpNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(type) ARM64_ARG(false)); + return gtNewSimdBinOpNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, + genTypeSize(type) ARM64_ARG(false)); } if (varTypeIsSIMD(op1)) { // E.g. a comparison of SIMD ops returning TYP_INT; assert(varTypeIsSIMD(op2)); - return gtNewSimdCmpOpAllNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, genTypeSize(op1) ARM64_ARG(false)); + return gtNewSimdCmpOpAllNode(oper, type, op1, op2, CORINFO_TYPE_NATIVEUINT, + genTypeSize(op1) ARM64_ARG(false)); } #endif return gtNewOperNode(oper, type, op1, op2); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 67b1478c1648b6..912a2131b065da 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -576,8 +576,7 @@ void CodeGen::inst_Mov(var_types dstType, GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, flags); #elif defined(TARGET_ARM64) bool isScalable = (size == EA_SCALABLE) || (Compiler::UseStrictSveForType(dstType)); - GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, - isScalable ? INS_OPTS_SCALABLE_B : INS_OPTS_NONE); + GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip, isScalable ? INS_OPTS_SCALABLE_B : INS_OPTS_NONE); #else GetEmitter()->emitIns_Mov(ins, size, dstReg, srcReg, canSkip); #endif diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 4f55b4119b3bbd..11e320c4bc822a 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1698,8 +1698,9 @@ class LocalAddressVisitor final : public GenTreeVisitor { // Handle case 1 or the float field of case 2 GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType)); - hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, - CORINFO_TYPE_FLOAT, genTypeSize(varDsc) ARM64_ARG(false)); + hwiNode = + m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT, + genTypeSize(varDsc) ARM64_ARG(false)); break; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b354d48641c613..12f96163af5c66 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11017,7 +11017,9 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) var_types simdBaseType = tree->GetSimdBaseType(); unsigned simdSize = tree->GetSimdSize(); #if defined(TARGET_ARM64) - bool isScalable = (((FIRST_NI_Vector <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Vector)) || ((FIRST_NI_Sve <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Sve))); + bool isScalable = + (((FIRST_NI_Vector <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Vector)) || + ((FIRST_NI_Sve <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Sve))); #endif if (tree->isCommutativeHWIntrinsic()) @@ -11049,7 +11051,8 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree) // Move constant vectors from op1 to op2 for comparison operations genTreeOps newOper = GenTree::SwapRelop(oper); var_types lookupType = - GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, simdSize ARM64_ARG(isScalable)); + GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, + simdSize ARM64_ARG(isScalable)); NamedIntrinsic newId = GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, newOper, retType, op2, op1, simdBaseType, simdSize, false ARM64_ARG(isScalable)); diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index fcc634248b5ee7..4b40cc0a1c1f15 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -350,7 +350,7 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */) var_types tempType = RegSet::tmpNormalizeType(treeType); regMaskTP mask; bool floatSpill = false; - bool maskSpill = false; + bool maskSpill = false; if (isFloatRegType(treeType)) { @@ -361,7 +361,7 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */) if (varTypeUsesMaskReg(treeType)) { maskSpill = true; - mask = genRegMask(reg); + mask = genRegMask(reg); } #endif else From 9e70dd0428dbdd18e0e57b2d4980a97a88be92e7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 16 Jun 2025 13:46:32 -0700 Subject: [PATCH 104/120] Disable Vector's WidenUpper and WidenLower intrinsic --- src/coreclr/jit/hwintrinsicarm64.cpp | 8 ++++---- src/coreclr/jit/hwintrinsiclistarm64sve.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index fd33102a28c0d1..066000ac5d981e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3047,7 +3047,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector_WidenLower: + //case NI_Vector_WidenLower: case NI_Vector64_WidenLower: case NI_Vector128_WidenLower: { @@ -3065,7 +3065,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector_WidenUpper: + //case NI_Vector_WidenUpper: case NI_Vector64_WidenUpper: case NI_Vector128_WidenUpper: { @@ -3745,8 +3745,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_op_Subtraction: case NI_Vector_Sum: case NI_Vector_Truncate: - case NI_Vector_WidenLower: - case NI_Vector_WidenUpper: + //case NI_Vector_WidenLower: + //case NI_Vector_WidenUpper: { if (retNode->OperIsHWIntrinsic()) { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7a45820f6b64fc..fafaa4729930db 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -99,8 +99,9 @@ HARDWARE_INTRINSIC(Vector, Subtract, HARDWARE_INTRINSIC(Vector, Sum, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, ToScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(Vector, Truncate, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +// No good equivalent in SVE to simulate WidenLower i.e. ConvertToDouble /WidenUpper i.e ConvertToDoubleUpper +//HARDWARE_INTRINSIC(Vector, WidenLower, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +//HARDWARE_INTRINSIC(Vector, WidenUpper, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector, Xor, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_AllBitsSet, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector, get_Indices, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) From f482e65c6bfc746c659ba36025545feee162b7c5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 16 Jun 2025 13:52:45 -0700 Subject: [PATCH 105/120] Do not generate SVE if not supported --- src/coreclr/jit/importercalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b8e10d605deec8..16646fc1253ea3 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -10839,7 +10839,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) assert((size == 16) || (size == 32) || (size == 64)); bool useSizeAgnosticVector = false; #ifdef TARGET_ARM64 - useSizeAgnosticVector = UseSveForVectorT(); + useSizeAgnosticVector = compExactlyDependsOn(InstructionSet_Sve) && UseSveForVectorT(); #endif const char* lookupClassName = className; From 05b4d06cfbe85f379decdc374e16d7dd3257713a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 16 Jun 2025 16:41:29 -0700 Subject: [PATCH 106/120] Changes from #116726 --- src/coreclr/jit/emit.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 1e73c07e0092e6..39760d5dce0b4d 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2873,9 +2873,6 @@ void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMas } else { - // This is not an EXTEND group. - assert((emitCurIG->igFlags & IGF_EXTEND) == 0); - #if defined(DEBUG) || defined(LATE_DISASM) emitCurIG->igWeight = getCurrentBlockWeight(); emitCurIG->igPerfScore = 0.0; From 6a73a98b90c8e6c39ed12317b437771dcc61cc8d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 18 Jun 2025 12:42:50 -0700 Subject: [PATCH 107/120] Handle cases for shift amount as Vector --- src/coreclr/jit/gentree.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f7c63d5234116f..790e46fe6af8e2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20868,14 +20868,20 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, assert(op2 != nullptr); + bool isOp2SimdType = (genActualType(op2) == genActualType(type)) || (genActualType(op2) == genActualType(simdBaseType)) || + (op2->TypeIs(TYP_SIMD12) && (type == TYP_SIMD16)); + if ((op == GT_LSH) || (op == GT_RSH) || (op == GT_RSZ)) { - assert(genActualType(op2) == TYP_INT); + assert((genActualType(op2) == TYP_INT) +#if defined (TARGET_ARM64) + || (isScalable && isOp2SimdType) +#endif + ); } else { - assert((genActualType(op2) == genActualType(type)) || (genActualType(op2) == genActualType(simdBaseType)) || - (op2->TypeIs(TYP_SIMD12) && (type == TYP_SIMD16))); + assert(isOp2SimdType); } bool needsReverseOps = false; @@ -20932,6 +20938,13 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, } #endif // TARGET_ARM64 } +#ifdef TARGET_ARM64 + else if (UseSveForType(type) && isScalable && varTypeIsSIMD(op2->TypeGet())) + { + // SVE already have variant that operates on vector operands. + // Do not do anything. + } +#endif else { op2 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(shiftCountMask)); From d523ee37267600c47f714bcd2cfe7b09013fa25e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 18 Jun 2025 17:51:45 -0700 Subject: [PATCH 108/120] Fix Vector.ConditionalSelect --- src/coreclr/jit/compiler.h | 4 +--- src/coreclr/jit/gentree.cpp | 27 +++++++++-------------- src/coreclr/jit/hwintrinsicarm64.cpp | 5 ++--- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 +- 4 files changed, 14 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f7904676514856..3fc40ff3e8cf5e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3208,9 +3208,7 @@ class Compiler GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, - unsigned simdSize - ARM64_ARG(bool isScalable) - ); + unsigned simdSize); #if defined(FEATURE_MASKED_HW_INTRINSICS) GenTree* gtNewSimdCvtMaskToVectorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 790e46fe6af8e2..a639d5cad14d2d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20873,11 +20873,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, if ((op == GT_LSH) || (op == GT_RSH) || (op == GT_RSZ)) { - assert((genActualType(op2) == TYP_INT) + bool op2Type = genActualType(op2) == TYP_INT; #if defined (TARGET_ARM64) - || (isScalable && isOp2SimdType) + op2Type |= (isScalable && isOp2SimdType); #endif - ); + assert(op2Type && "op2's type is unexpected."); } else { @@ -22732,7 +22732,7 @@ GenTree* Compiler::gtNewSimdCndSelNode(var_types type, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, - unsigned simdSize ARM64_ARG(bool isScalable)) + unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -22768,17 +22768,7 @@ GenTree* Compiler::gtNewSimdCndSelNode(var_types type, } return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - if (UseSveForType(type) && isScalable) - { - intrinsic = NI_Sve_ConditionalSelect; - op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); - } - else - { - intrinsic = NI_AdvSimd_BitwiseSelect; - } - - return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, NI_AdvSimd_BitwiseSelect, simdBaseJitType, simdSize); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24744,7 +24734,7 @@ GenTree* Compiler::gtNewSimdMaxNativeNode(var_types type, op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(false)); // result = ConditionalSelect(op1, op1Dup, op2Dup) - return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdMinNode(var_types type, @@ -25021,7 +25011,7 @@ GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); // result = ConditionalSelect(op1, op1Dup, op2Dup) - return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); } GenTree* Compiler::gtNewSimdNarrowNode( @@ -29555,6 +29545,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetScalableHWIntrinsicId(var_types simdT case NI_AdvSimd_And: sveId = NI_Sve_And; break; + case NI_AdvSimd_BitwiseSelect: + sveId = NI_Sve2_BitwiseSelect; + break; case NI_AdvSimd_Ceiling: case NI_AdvSimd_Arm64_Ceiling: sveId = NI_Sve_RoundToPositiveInfinity; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 066000ac5d981e..bdcdf66c7a20b7 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -975,8 +975,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize, - intrinsic == NI_Vector_ConditionalSelect); + retNode = gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize); break; } @@ -3724,6 +3723,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { case NI_Vector_Abs: case NI_Vector_Ceiling: + case NI_Vector_ConditionalSelect: case NI_Vector_ConvertToDouble: case NI_Vector_ConvertToInt32Native: case NI_Vector_ConvertToInt32: @@ -3771,7 +3771,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert((FIRST_NI_Sve <= sveIntrinsic) && (sveIntrinsic <= LAST_NI_Sve)); break; } - case NI_Vector_ConditionalSelect: case NI_Vector_Equals: case NI_Vector_op_Equality: case NI_Vector_EqualsAny: diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index fafaa4729930db..90aa4400552160 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -439,7 +439,7 @@ HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningUpper, HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLower, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningUpper, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve2, InterleavingXorEvenOdd, -1, 3, {INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) From 98fa9f37c71ae5431582920aabe79b54e55d2201 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 18 Jun 2025 23:39:59 -0700 Subject: [PATCH 109/120] Fix Multiple Vector * T case --- src/coreclr/jit/gentree.cpp | 16 +++++++++------- src/coreclr/jit/lowerarmarch.cpp | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a639d5cad14d2d..a4072df92dd86d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20993,11 +20993,7 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, broadcastOp = &op2; } - if ((broadcastOp != nullptr) -#if defined(TARGET_ARM64) - && !isScalable -#endif - ) + if (broadcastOp != nullptr) { #if defined(TARGET_ARM64) if (varTypeIsLong(simdBaseType)) @@ -21011,9 +21007,15 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, *broadcastOp = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, *broadcastOp, simdBaseJitType, 8); break; } + else if (isScalable) + { + *broadcastOp = gtNewSimdHWIntrinsicNode(type, *broadcastOp, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + } + else #endif // TARGET_ARM64 - - *broadcastOp = gtNewSimdCreateBroadcastNode(type, *broadcastOp, simdBaseJitType, simdSize); + { + *broadcastOp = gtNewSimdCreateBroadcastNode(type, *broadcastOp, simdBaseJitType, simdSize); + } } break; } diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index a788d45f66244a..7668817d721192 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1614,6 +1614,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { #ifdef TARGET_ARM64 + //TODO-VL: Remove this entry because this is not handled properly inside LowerHWIntrinsicCreate case NI_Vector_Create: #endif case NI_Vector64_Create: From 907446172dbf25263fe12b5e40a3f34cc160a597 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 00:01:22 -0700 Subject: [PATCH 110/120] Add entry for VectorMath test in ISA --- src/coreclr/jit/hwintrinsicarm64.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index bdcdf66c7a20b7..42fdf3eedcbe15 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -132,6 +132,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_ILLEGAL; } + else if (strncmp(className, "VectorMath", 10) == 0) + { + return InstructionSet_ILLEGAL; + } else if (strncmp(className, "Vector", 6) == 0) { return InstructionSet_Vector; From bcb7beef163f3b18e7c8fbc185a524b8792b8726 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 12:11:29 -0700 Subject: [PATCH 111/120] Fix CreateSequence for float/double --- src/coreclr/jit/hwintrinsicarm64.cpp | 45 +++++++++++++--------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 42fdf3eedcbe15..e28a1421f1fbbc 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1330,6 +1330,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector_CreateSequence: + { + assert(Compiler::UseSveForType(retType)); + + if ((simdBaseJitType != CORINFO_TYPE_FLOAT) && (simdBaseJitType != CORINFO_TYPE_DOUBLE)) + { + // There is no way to do floating point `initial and `step` in SVE, corresponding + // to the `Vector.CreateSequence(). + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); + } + break; + } + case NI_Vector64_CreateSequence: case NI_Vector128_CreateSequence: { @@ -1679,27 +1694,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector_CreateSequence: - { - assert(Compiler::UseSveForType(retType)); - - op2 = impPopStack().val; - op1 = impPopStack().val; - - // TODO-VL: There is no way to do floating point `initial and `step` in SVE, corresponding - // to the `Vector.CreateSequence(). For now, just treat it as integral. - if (!varTypeIsIntegral(op1)) - { - op1 = gtNewCastNode(TYP_LONG, op1, false, TYP_LONG); - } - if (!varTypeIsIntegral(op2)) - { - op2 = gtNewCastNode(TYP_LONG, op2, false, TYP_LONG); - } - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); - break; - } - case NI_Vector_ToScalar: { op1 = impSIMDPopStack(); @@ -1720,9 +1714,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_get_Indices: { - GenTree* start = gtNewIconNode(0, TYP_INT); - GenTree* step = gtNewIconNode(1, TYP_INT); - retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); + if ((simdBaseJitType != CORINFO_TYPE_FLOAT) && (simdBaseJitType != CORINFO_TYPE_DOUBLE)) + { + GenTree* start = gtNewIconNode(0, TYP_INT); + GenTree* step = gtNewIconNode(1, TYP_INT); + retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); + } break; } case NI_Vector64_get_Indices: From f151c6427729c2b8e70db9da1b174d8ae4e33f27 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 13:44:44 -0700 Subject: [PATCH 112/120] MUL with DuplicateScalarToVector --- src/coreclr/jit/gentree.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a4072df92dd86d..60cff4d74aa1f7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20996,7 +20996,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, if (broadcastOp != nullptr) { #if defined(TARGET_ARM64) - if (varTypeIsLong(simdBaseType)) + if (isScalable) + { + *broadcastOp = gtNewSimdHWIntrinsicNode(type, *broadcastOp, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + } + else if (varTypeIsLong(simdBaseType)) { // This is handled via emulation and the scalar is consumed directly break; @@ -21007,10 +21011,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, *broadcastOp = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, *broadcastOp, simdBaseJitType, 8); break; } - else if (isScalable) - { - *broadcastOp = gtNewSimdHWIntrinsicNode(type, *broadcastOp, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); - } else #endif // TARGET_ARM64 { From 39374e37393f3d941a50497ae862fc051e42db85 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 14:14:48 -0700 Subject: [PATCH 113/120] fix merge conflict errors --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.cpp | 8 ++++---- src/coreclr/jit/hwintrinsicarm64.cpp | 19 ++++++++++++++++--- src/coreclr/jit/lowerarmarch.cpp | 6 +++--- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 571a2596a22a76..99671696199805 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3158,7 +3158,7 @@ class Compiler var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); #if defined(TARGET_ARM64) - GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType); + GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); GenTree* gtNewSimdFalseMaskByteNode(); #endif diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d659195224400c..97bc5de2cd4714 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22475,7 +22475,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op2 = gtNewSimdFalseMaskByteNode(); } else { @@ -22519,7 +22519,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op, GenTree* allTrue = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op2 = gtNewSimdFalseMaskByteNode(); } else { @@ -22676,7 +22676,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op2 = gtNewSimdFalseMaskByteNode(); } else { @@ -22719,7 +22719,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, op1 = gtNewSimdHWIntrinsicNode(TYP_LONG, allTrue, cmpResult, NI_Sve_GetActiveElementCount, simdBaseJitType, simdSize); - op2 = gtNewSimdAllFalseMaskNode(simdBaseJitType, simdSize); + op2 = gtNewSimdFalseMaskByteNode(); } else { diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a7ab3ab093992a..b8f93a33288ed0 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3907,16 +3907,29 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // Return Value: // The mask // -GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType) +GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize) { // Import as a constant mask var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); - // TODO-SVE: For agnostic VL, vector type may not be simd16_t + bool found = false; - bool found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); + switch (simdSize) + { + case 16: + found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); + break; + case 32: + found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); + break; + case 64: + found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); + break; + default: + unreached(); + } assert(found); return mskCon; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 69a9884011f5c7..9d890538df1dfa 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1985,7 +1985,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) var_types simdType = Compiler::getSIMDTypeForSize(simdSize); bool foundUse = BlockRange().TryGetUse(node, &use); - GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType); + GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); GenTree* falseVal = comp->gtNewZeroConNode(simdType); var_types nodeType = simdType; @@ -2098,12 +2098,12 @@ GenTree* Lowering::LowerHWIntrinsicCmpOpVL(GenTreeHWIntrinsic* node, genTreeOps GenTree* op = nullptr; GenTree* opZero = nullptr; - if (op1->IsMaskZero()) + if (op1->IsFalseMask()) { op = op2; opZero = op1; } - else if (op2->IsMaskZero()) + else if (op2->IsFalseMask()) { op = op1; opZero = op2; From 324d241b64354bea2ac645ee65899eb4621b321a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 18:17:39 -0700 Subject: [PATCH 114/120] Fix the value numbering --- src/coreclr/jit/valuenum.cpp | 42 +++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index b8dd649d3b2583..ab84e0ded068d4 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -2365,7 +2365,7 @@ ValueNum ValueNumStore::VNForSimdType(unsigned simdSize, CorInfoType simdBaseJit bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, ValueNum valVN) { - assert(varTypeIsSIMD(simdType)); + assert(varTypeIsSIMDOrMask(simdType)); simd_t vector = {}; @@ -2408,6 +2408,15 @@ bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, Va } #endif // TARGET_XARCH || TARGET_ARM64 +#if defined(FEATURE_MASKED_HW_INTRINSICS) + case TYP_MASK: + { + simdmask_t tmp = GetConstantSimdMask(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } +#endif // FEATURE_MASKED_HW_INTRINSICS + default: { unreached(); @@ -2474,6 +2483,15 @@ bool ValueNumStore::VNIsVectorNegativeZero(var_types simdType, var_types simdBas } #endif // TARGET_XARCH || TARGET_ARM64 +#if defined(FEATURE_MASKED_HW_INTRINSICS) + case TYP_MASK: + { + simdmask_t tmp = GetConstantSimdMask(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } +#endif // FEATURE_MASKED_HW_INTRINSICS + default: { unreached(); @@ -7809,6 +7827,15 @@ ValueNum EvaluateSimdCvtMaskToVector(ValueNumStore* vns, var_types simdType, var } #endif // TARGET_XARCH || TARGET_ARM64 +#if defined(FEATURE_MASKED_HW_INTRINSICS) + case TYP_MASK: + { + simdmask_t result = {}; + EvaluateSimdCvtMaskToVector(baseType, &result, arg0); + return vns->VNForSimdMaskCon(result); + } +#endif // FEATURE_MASKED_HW_INTRINSICS + default: { unreached(); @@ -8836,8 +8863,17 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary( { if (varTypeIsFloating(baseType)) { - // Handle `(x == NaN) == false` and `(NaN == x) == false` for floating-point types - var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + var_types simdType; + + if (varTypeIsMask(TypeOfVN(cnsVN))) + { + simdType = TYP_MASK; + } + else + { + // Handle `(x == NaN) == false` and `(NaN == x) == false` for floating-point types + simdType = Compiler::getSIMDTypeForSize(simdSize); + } if (VNIsVectorNaN(simdType, baseType, cnsVN)) { From fc24657f3fe91e76107ef600f419dbd9af97e5c9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 23:33:36 -0700 Subject: [PATCH 115/120] disable Sve when it is not available --- src/coreclr/jit/compiler.cpp | 12 ++++++++---- src/coreclr/jit/compiler.h | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 27d5070133648d..5458388dbae5ba 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2552,7 +2552,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) * { * compVectorTLength = getTargetLength(); * compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && ((compVectorTLength & - * (compVectorTLength - 1)) == 0); compUseSveForVectorT |= JitConfig.UseSveForVectorT(); + * (compVectorTLength - 1)) == 0); compUseSveForVectorT |= JitConfig.UseSveForVectorT(); * } * else * { @@ -2578,11 +2578,15 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (info.compMatchedVM) { compVectorTLength = info.compCompHnd->getTargetVectorLength(); - compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && - ((compVectorTLength & (compVectorTLength - 1)) == 0); + + if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) + { + compUseSveForVectorT = (compVectorTLength > 16) && (compVectorTLength <= 256) && + ((compVectorTLength & (compVectorTLength - 1)) == 0); #ifdef DEBUG - compUseSveForVectorT |= (bool)JitConfig.UseSveForVectorT(); + compUseSveForVectorT |= (bool)JitConfig.UseSveForVectorT(); #endif // DEBUG + } } else { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 99671696199805..cc882091e06363 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8978,13 +8978,13 @@ class Compiler FORCEINLINE static bool UseSveForType(var_types type) { return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && - (type != TYP_SIMD8); // ((type == TYP_SIMD32) || (type == TYP_SIMD64)); + (type != TYP_SIMD8) && (type != TYP_SIMD12); } FORCEINLINE static bool UseStrictSveForType(var_types type) { // This method is used in scenarios where we do not know the type of HIR node or how the LIR node was formed. // For such cases, we will generate SVE, only if we are guaranteed to have VL >= 32B. - return UseSveForType(type) && (type != TYP_SIMD16); + return UseSveForType(type) && (type != TYP_SIMD12) && (type != TYP_SIMD16); } FORCEINLINE static bool SizeMatchesVectorTLength(unsigned simdSize) { From a997047cece28cbbb2dedf91f3b167a2843e5cb8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 19 Jun 2025 23:36:37 -0700 Subject: [PATCH 116/120] jit format --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 3 +-- src/coreclr/jit/gentree.cpp | 18 ++++++++--------- src/coreclr/jit/hwintrinsicarm64.cpp | 30 ++++++++++++++-------------- src/coreclr/jit/lowerarmarch.cpp | 2 +- 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5458388dbae5ba..9bcae10c464508 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2577,7 +2577,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (info.compMatchedVM) { - compVectorTLength = info.compCompHnd->getTargetVectorLength(); + compVectorTLength = info.compCompHnd->getTargetVectorLength(); if (compExactlyDependsOn(InstructionSet_Sve_Arm64)) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cc882091e06363..49fbce9353c58a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8977,8 +8977,7 @@ class Compiler } FORCEINLINE static bool UseSveForType(var_types type) { - return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && - (type != TYP_SIMD8) && (type != TYP_SIMD12); + return UseSveForVectorT() && varTypeIsSIMDOrMask(type) && (type != TYP_SIMD8) && (type != TYP_SIMD12); } FORCEINLINE static bool UseStrictSveForType(var_types type) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 97bc5de2cd4714..34f360a7596111 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20882,13 +20882,14 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, assert(op2 != nullptr); - bool isOp2SimdType = (genActualType(op2) == genActualType(type)) || (genActualType(op2) == genActualType(simdBaseType)) || - (op2->TypeIs(TYP_SIMD12) && (type == TYP_SIMD16)); + bool isOp2SimdType = (genActualType(op2) == genActualType(type)) || + (genActualType(op2) == genActualType(simdBaseType)) || + (op2->TypeIs(TYP_SIMD12) && (type == TYP_SIMD16)); if ((op == GT_LSH) || (op == GT_RSH) || (op == GT_RSZ)) { bool op2Type = genActualType(op2) == TYP_INT; -#if defined (TARGET_ARM64) +#if defined(TARGET_ARM64) op2Type |= (isScalable && isOp2SimdType); #endif assert(op2Type && "op2's type is unexpected."); @@ -21012,7 +21013,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op, #if defined(TARGET_ARM64) if (isScalable) { - *broadcastOp = gtNewSimdHWIntrinsicNode(type, *broadcastOp, NI_Sve_DuplicateScalarToVector, simdBaseJitType, simdSize); + *broadcastOp = gtNewSimdHWIntrinsicNode(type, *broadcastOp, NI_Sve_DuplicateScalarToVector, + simdBaseJitType, simdSize); } else if (varTypeIsLong(simdBaseType)) { @@ -22743,12 +22745,8 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op, return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } -GenTree* Compiler::gtNewSimdCndSelNode(var_types type, - GenTree* op1, - GenTree* op2, - GenTree* op3, - CorInfoType simdBaseJitType, - unsigned simdSize) +GenTree* Compiler::gtNewSimdCndSelNode( + var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index b8f93a33288ed0..fd14eed6f93289 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1347,8 +1347,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // There is no way to do floating point `initial and `step` in SVE, corresponding // to the `Vector.CreateSequence(). - op2 = impPopStack().val; - op1 = impPopStack().val; + op2 = impPopStack().val; + op1 = impPopStack().val; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_Sve_Index, simdBaseJitType, simdSize); } break; @@ -1726,7 +1726,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if ((simdBaseJitType != CORINFO_TYPE_FLOAT) && (simdBaseJitType != CORINFO_TYPE_DOUBLE)) { GenTree* start = gtNewIconNode(0, TYP_INT); - GenTree* step = gtNewIconNode(1, TYP_INT); + GenTree* step = gtNewIconNode(1, TYP_INT); retNode = gtNewSimdHWIntrinsicNode(retType, start, step, NI_Sve_Index, simdBaseJitType, simdSize); } break; @@ -3056,7 +3056,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - //case NI_Vector_WidenLower: + // case NI_Vector_WidenLower: case NI_Vector64_WidenLower: case NI_Vector128_WidenLower: { @@ -3074,7 +3074,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - //case NI_Vector_WidenUpper: + // case NI_Vector_WidenUpper: case NI_Vector64_WidenUpper: case NI_Vector128_WidenUpper: { @@ -3805,18 +3805,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector_op_Subtraction: case NI_Vector_Sum: case NI_Vector_Truncate: - //case NI_Vector_WidenLower: - //case NI_Vector_WidenUpper: - { - if (retNode->OperIsHWIntrinsic()) + // case NI_Vector_WidenLower: + // case NI_Vector_WidenUpper: { - intrinsic = - GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, - retNode->AsHWIntrinsic()->GetHWIntrinsicId()); - retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + if (retNode->OperIsHWIntrinsic()) + { + intrinsic = + GenTreeHWIntrinsic::GetScalableHWIntrinsicId(retType, simdBaseType, + retNode->AsHWIntrinsic()->GetHWIntrinsicId()); + retNode->AsHWIntrinsic()->ChangeHWIntrinsicId(intrinsic); + } + break; } - break; - } case NI_Vector_Add: case NI_Vector_op_Addition: case NI_Vector_AndNot: diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 9d890538df1dfa..4003682a9aa98b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1614,7 +1614,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { #ifdef TARGET_ARM64 - //TODO-VL: Remove this entry because this is not handled properly inside LowerHWIntrinsicCreate + // TODO-VL: Remove this entry because this is not handled properly inside LowerHWIntrinsicCreate case NI_Vector_Create: #endif case NI_Vector64_Create: From f10bb0b1fe18018a01070fbcb83ace8c48ba6b47 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 23 Jun 2025 17:42:54 -0700 Subject: [PATCH 117/120] fix the cmpOpNode return to TYP_MASK --- src/coreclr/jit/gentree.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 34f360a7596111..c9707d6fa22846 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22065,10 +22065,11 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, #if defined(TARGET_ARM64) if (isScalable) { + assert(varTypeIsMask(lookupType)); + if (wrapInCmtv) { // cndsel(result, 0xFF, 0) - assert(varTypeIsMask(lookupType)); GenTree* retNode = gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); GenTree* allOnes = gtNewAllBitsSetConNode(type); GenTree* allZeros = gtNewZeroConNode(Compiler::getSIMDTypeForSize(simdSize)); @@ -22078,7 +22079,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, else { // will be wrapped by GetActiveElementCount - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(lookupType, op1, op2, intrinsic, simdBaseJitType, simdSize); } } else From 49a536ac2c9acd5152767f302765cddbd218dd33 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 23 Jun 2025 22:29:39 -0700 Subject: [PATCH 118/120] fix merge conflict errors --- src/coreclr/jit/gentree.cpp | 114 +++++++++++++++------------ src/coreclr/jit/hwintrinsicarm64.cpp | 27 ++++++- src/coreclr/jit/importercalls.cpp | 5 +- 3 files changed, 89 insertions(+), 57 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8b3ca1c315d9f4..466dda08c7ddb7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24470,15 +24470,14 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, // Return Value: // The node representing the minimum or maximum operation // -GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, - GenTree* op1, - GenTree* op2, - CorInfoType simdBaseJitType, - unsigned simdSize, - bool isMax, - bool isMagnitude, - bool isNumber - ARM64_ARG(bool isScalable)) +GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isMax, + bool isMagnitude, + bool isNumber ARM64_ARG(bool isScalable)) { assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -24530,7 +24529,6 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, type = TYP_SIMD16; } else if (op1->IsCnsVec()) - unsigned simdSize ARM64_ARG(bool isScalable)) { cnsNode = op1; otherNode = op2; @@ -24942,7 +24940,7 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, #elif defined(TARGET_ARM64) if (!isMagnitude && !isNumber) { - return gtNewSimdMinMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize, isMax); + return gtNewSimdMinMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize, isMax ARM64_ARG(isScalable)); } if (isScalar) @@ -24959,7 +24957,7 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, if (retNode == nullptr) { - // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); + // intrinsic = GenTreeHWIntrinsic::GetScalableHWIntrinsicId(type, intrinsic); GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); @@ -24991,58 +24989,68 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, absOp1Dup = fgMakeMultiUse(&absOp1); absOp2Dup = fgMakeMultiUse(&absOp2); - equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, absOp1, absOp2, simdBaseJitType, simdSize); + equalsMask = + gtNewSimdCmpOpNode(GT_EQ, type, absOp1, absOp2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); if (isMax) { - signMask = gtNewSimdIsPositiveNode(type, op1Dup, simdBaseJitType, simdSize); - cmpMask = gtNewSimdCmpOpNode(GT_GT, type, absOp1Dup, absOp2Dup, simdBaseJitType, simdSize); + signMask = gtNewSimdIsPositiveNode(type, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + cmpMask = gtNewSimdCmpOpNode(GT_GT, type, absOp1Dup, absOp2Dup, simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } else { - signMask = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize); - cmpMask = gtNewSimdCmpOpNode(GT_LT, type, absOp1Dup, absOp2Dup, simdBaseJitType, simdSize); + signMask = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + cmpMask = gtNewSimdCmpOpNode(GT_LT, type, absOp1Dup, absOp2Dup, simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } if (isNumber) { - nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(absOp2Dup), simdBaseJitType, simdSize); + nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(absOp2Dup), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } else { - nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(absOp1Dup), simdBaseJitType, simdSize); + nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(absOp1Dup), simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } } else { - equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize); + equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); if (isMax) { - signMask = gtNewSimdIsNegativeNode(type, op2Dup, simdBaseJitType, simdSize); - cmpMask = gtNewSimdCmpOpNode(GT_LT, type, gtCloneExpr(op2Dup), op1Dup, simdBaseJitType, simdSize); + signMask = gtNewSimdIsNegativeNode(type, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + cmpMask = gtNewSimdCmpOpNode(GT_LT, type, gtCloneExpr(op2Dup), op1Dup, simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } else { - signMask = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize); - cmpMask = gtNewSimdCmpOpNode(GT_LT, type, gtCloneExpr(op1Dup), op2Dup, simdBaseJitType, simdSize); + signMask = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + cmpMask = gtNewSimdCmpOpNode(GT_LT, type, gtCloneExpr(op1Dup), op2Dup, simdBaseJitType, + simdSize ARM64_ARG(isScalable)); } if (isNumber) { - nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(op2Dup), simdBaseJitType, simdSize); + nanMask = + gtNewSimdIsNaNNode(type, gtCloneExpr(op2Dup), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } else { - nanMask = gtNewSimdIsNaNNode(type, gtCloneExpr(op1Dup), simdBaseJitType, simdSize); + nanMask = + gtNewSimdIsNaNNode(type, gtCloneExpr(op1Dup), simdBaseJitType, simdSize ARM64_ARG(isScalable)); } op2Dup = gtCloneExpr(op2Dup); } - GenTree* mask = gtNewSimdBinOpNode(GT_AND, type, equalsMask, signMask, simdBaseJitType, simdSize); - mask = gtNewSimdBinOpNode(GT_OR, type, mask, nanMask, simdBaseJitType, simdSize); - mask = gtNewSimdBinOpNode(GT_OR, type, mask, cmpMask, simdBaseJitType, simdSize); + GenTree* mask = + gtNewSimdBinOpNode(GT_AND, type, equalsMask, signMask, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + mask = gtNewSimdBinOpNode(GT_OR, type, mask, nanMask, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + mask = gtNewSimdBinOpNode(GT_OR, type, mask, cmpMask, simdBaseJitType, simdSize ARM64_ARG(isScalable)); retNode = gtNewSimdCndSelNode(type, mask, gtCloneExpr(op1Dup), op2Dup, simdBaseJitType, simdSize); } @@ -25054,12 +25062,9 @@ GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, } return retNode; } + assert(!isScalar); -GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, - GenTree* op1, - GenTree* op2, - CorInfoType simdBaseJitType, - unsigned simdSize ARM64_ARG(bool isScalable)) + if (isMagnitude) { GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); @@ -25070,7 +25075,8 @@ GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, GenTree* absOp1Dup = fgMakeMultiUse(&absOp1); GenTree* absOp2Dup = fgMakeMultiUse(&absOp2); - GenTree* equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, absOp1, absOp2, simdBaseJitType, simdSize); + GenTree* equalsMask = + gtNewSimdCmpOpNode(GT_EQ, type, absOp1, absOp2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); ; GenTree* signMask1 = nullptr; GenTree* signMask2 = nullptr; @@ -25079,29 +25085,31 @@ GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, if (isMax) { - signMask1 = gtNewSimdIsNegativeNode(type, op2Dup, simdBaseJitType, simdSize); - signMask2 = gtNewSimdIsPositiveNode(type, absOp2Dup, simdBaseJitType, simdSize); - signMask3 = gtNewSimdIsNegativeNode(type, absOp1Dup, simdBaseJitType, simdSize); + signMask1 = gtNewSimdIsNegativeNode(type, op2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + signMask2 = gtNewSimdIsPositiveNode(type, absOp2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + signMask3 = gtNewSimdIsNegativeNode(type, absOp1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); cmpMask = gtNewSimdCmpOpNode(GT_GT, type, gtCloneExpr(absOp1Dup), gtCloneExpr(absOp2Dup), simdBaseJitType, - simdSize); + simdSize ARM64_ARG(isScalable)); } else { - signMask1 = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize); - signMask2 = gtNewSimdIsPositiveNode(type, absOp1Dup, simdBaseJitType, simdSize); - signMask3 = gtNewSimdIsNegativeNode(type, absOp2Dup, simdBaseJitType, simdSize); + signMask1 = gtNewSimdIsNegativeNode(type, op1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + signMask2 = gtNewSimdIsPositiveNode(type, absOp1Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + signMask3 = gtNewSimdIsNegativeNode(type, absOp2Dup, simdBaseJitType, simdSize ARM64_ARG(isScalable)); cmpMask = gtNewSimdCmpOpNode(GT_LT, type, gtCloneExpr(absOp1Dup), gtCloneExpr(absOp2Dup), simdBaseJitType, - simdSize); + simdSize ARM64_ARG(isScalable)); } - GenTree* mask1 = gtNewSimdBinOpNode(GT_AND, type, equalsMask, signMask1, simdBaseJitType, simdSize); - GenTree* mask2 = gtNewSimdBinOpNode(GT_AND, type, cmpMask, signMask2, simdBaseJitType, simdSize); - GenTree* mask3 = gtNewSimdBinOpNode(GT_OR, type, mask1, mask2, simdBaseJitType, simdSize); - mask3 = gtNewSimdBinOpNode(GT_OR, type, mask3, signMask3, simdBaseJitType, simdSize); + GenTree* mask1 = + gtNewSimdBinOpNode(GT_AND, type, equalsMask, signMask1, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + GenTree* mask2 = + gtNewSimdBinOpNode(GT_AND, type, cmpMask, signMask2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + GenTree* mask3 = gtNewSimdBinOpNode(GT_OR, type, mask1, mask2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); + mask3 = gtNewSimdBinOpNode(GT_OR, type, mask3, signMask3, simdBaseJitType, simdSize ARM64_ARG(isScalable)); return gtNewSimdCndSelNode(type, mask3, gtCloneExpr(op1Dup), gtCloneExpr(op2Dup), simdBaseJitType, simdSize); } - return gtNewSimdMinMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize, isMax); + return gtNewSimdMinMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize, isMax ARM64_ARG(isScalable)); } //------------------------------------------------------------------------ @@ -25124,8 +25132,12 @@ GenTree* Compiler::gtNewSimdMinNativeNode(var_types type, // is most efficient. This means that the exact result returned if either input is // NaN or -0 can differ based on the underlying hardware. // -GenTree* Compiler::gtNewSimdMinMaxNativeNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isMax) +GenTree* Compiler::gtNewSimdMinMaxNativeNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isMax ARM64_ARG(bool isScalable)) { assert(op1 != nullptr); assert(op1->TypeIs(type)); @@ -25359,7 +25371,7 @@ GenTree* Compiler::gtNewSimdMinMaxNativeNode( // op1 = op1 < op2 // -or- // op1 = op1 > op2 - op1 = gtNewSimdCmpOpNode(isMax ? GT_GT : GT_LT, type, op1, op2, simdBaseJitType, simdSize); + op1 = gtNewSimdCmpOpNode(isMax ? GT_GT : GT_LT, type, op1, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); // result = ConditionalSelect(op1, op1Dup, op2Dup) return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 623a23d669d644..2dda9a6da308bf 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -658,6 +658,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, bool isValidScalarIntrinsic = false; #endif + bool isScalable = false; bool isMinMaxIntrinsic = false; bool isMax = false; bool isMagnitude = false; @@ -2252,6 +2253,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_Max: + { + isScalable = true; + FALLTHROUGH; + } // case NI_Vector_MaxNumber: case NI_Vector64_Max: case NI_Vector128_Max: @@ -2281,6 +2286,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_Vector_MaxNative: + { + isScalable = true; + FALLTHROUGH; + } case NI_Vector64_MaxNative: case NI_Vector128_MaxNative: { @@ -2293,7 +2302,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_MaxNumber: case NI_Vector128_MaxNumber: { -s isMinMaxIntrinsic = true; + isMinMaxIntrinsic = true; isMax = true; isNumber = true; //{ @@ -2305,7 +2314,11 @@ s isMinMaxIntrinsic = true; } case NI_Vector_Min: - // case NI_Vector_MinNumber: + // case NI_Vector_MinNumber: + { + isScalable = true; + FALLTHROUGH; + } case NI_Vector64_Min: case NI_Vector128_Min: { @@ -2332,6 +2345,10 @@ s isMinMaxIntrinsic = true; } case NI_Vector_MinNative: + { + isScalable = true; + FALLTHROUGH; + } case NI_Vector64_MinNative: case NI_Vector128_MinNative: { @@ -3810,11 +3827,13 @@ s isMinMaxIntrinsic = true; if (isNative) { assert(!isMagnitude && !isNumber); - retNode = gtNewSimdMinMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize, isMax); + retNode = + gtNewSimdMinMaxNativeNode(retType, op1, op2, simdBaseJitType, simdSize, isMax ARM64_ARG(isScalable)); } else { - retNode = gtNewSimdMinMaxNode(retType, op1, op2, simdBaseJitType, simdSize, isMax, isMagnitude, isNumber); + retNode = gtNewSimdMinMaxNode(retType, op1, op2, simdBaseJitType, simdSize, isMax, isMagnitude, + isNumber ARM64_ARG(isScalable)); } } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index c0f38fe1f95d43..7414552eb168b7 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4952,11 +4952,12 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, if (isNative) { assert(!isMagnitude && !isNumber); - retNode = gtNewSimdMinMaxNativeNode(callType, op1, op2, callJitType, 0, isMax); + retNode = gtNewSimdMinMaxNativeNode(callType, op1, op2, callJitType, 0, isMax ARM64_ARG(false)); } else { - retNode = gtNewSimdMinMaxNode(callType, op1, op2, callJitType, 0, isMax, isMagnitude, isNumber); + retNode = gtNewSimdMinMaxNode(callType, op1, op2, callJitType, 0, isMax, isMagnitude, + isNumber ARM64_ARG(false)); } #endif // FEATURE_HW_INTRINSICS From 61ed25f05a442a81a5a834356ad1b11050704e76 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 24 Jun 2025 13:57:15 -0700 Subject: [PATCH 119/120] fix merge conflicts --- src/coreclr/jit/gentree.cpp | 32 ++++++++++++++--------------- src/coreclr/jit/gentree.h | 10 ++++----- src/coreclr/jit/morph.cpp | 41 ++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 48092caf8695ca..695a02d2c97e4c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -30957,16 +30957,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, // Returns: // The intrinsic ID based on the oper, base type, and simd size // -NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar, - bool reverseCond - ARM64_ARG(bool isScalable)) +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar, + bool reverseCond ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); @@ -31348,13 +31347,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, // type doesn't match with the type IR wants us to be producing. For example, the consuming node // may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK. // -var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - var_types simdBaseType, - unsigned simdSize, - bool reverseCond - ARM64_ARG(bool isScalable)) +var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + var_types simdBaseType, + unsigned simdSize, + bool reverseCond ARM64_ARG(bool isScalable)) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 0cf3878c918e49..3740baed6eb473 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6647,17 +6647,15 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic GenTree* op2, var_types simdBaseType, unsigned simdSize, - bool isScalar, - bool reverseCond = false - ARM64_ARG(bool isScalable)); + bool isScalar ARM64_ARG(bool isScalable), + bool reverseCond = false); static var_types GetLookupTypeForCmpOp(Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, - unsigned simdSize, - bool reverseCond = false - ARM64_ARG(bool isScalable)); + unsigned simdSize ARM64_ARG(bool isScalable), + bool reverseCond = false); static genTreeOps GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index dc9d128e68e8bf..000f94c2100f6a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9559,7 +9559,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool NamedIntrinsic subIntrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_SUB, op2, op1, simdBaseType, simdSize, - isScalar); + isScalar ARM64_ARG(isScalable)); node->ChangeHWIntrinsicId(subIntrinsic, op2, op1); return fgMorphHWIntrinsicRequired(node); @@ -9591,7 +9591,8 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool DEBUG_DESTROY_NODE(op2); DEBUG_DESTROY_NODE(node); - node = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize)->AsHWIntrinsic(); + node = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize ARM64_ARG(isScalable)) + ->AsHWIntrinsic(); #if defined(TARGET_XARCH) if (varTypeIsFloating(simdBaseType)) @@ -9633,7 +9634,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool NamedIntrinsic subIntrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_SUB, op1, op2, simdBaseType, simdSize, - isScalar); + isScalar ARM64_ARG(isScalable)); node->ChangeHWIntrinsicId(subIntrinsic, op1, op2); return fgMorphHWIntrinsicRequired(node); @@ -9966,7 +9967,8 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool } NamedIntrinsic addIntrinsic = - GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_ADD, op1, op2, simdBaseType, simdSize, isScalar); + GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_ADD, op1, op2, simdBaseType, simdSize, + isScalar ARM64_ARG(isScalable)); node->ChangeHWIntrinsicId(addIntrinsic, op1, op2); return fgMorphHWIntrinsicRequired(node); @@ -11529,9 +11531,9 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) var_types simdBaseType = tree->GetSimdBaseType(); unsigned simdSize = tree->GetSimdSize(); #if defined(TARGET_ARM64) - bool isScalable = - (((FIRST_NI_Vector <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Vector)) || - ((FIRST_NI_Sve <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Sve))); + bool isScalable = + (((FIRST_NI_Vector <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Vector)) || + ((FIRST_NI_Sve <= tree->GetHWIntrinsicId()) && (tree->GetHWIntrinsicId() <= LAST_NI_Sve))); #endif bool isScalar = false; @@ -11559,11 +11561,12 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) { // Move constant vectors from op1 to op2 for comparison operations // Noting that we can't handle scalar operations since they can copy upper bits from op1 - genTreeOps newOper = GenTree::SwapRelop(oper); - var_types lookupType = - GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, simdSize); - NamedIntrinsic newId = GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, newOper, lookupType, op2, op1, - simdBaseType, simdSize, isScalar); + genTreeOps newOper = GenTree::SwapRelop(oper); + var_types lookupType = GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, newOper, retType, simdBaseType, + simdSize ARM64_ARG(isScalable)); + NamedIntrinsic newId = + GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, newOper, lookupType, op2, op1, simdBaseType, + simdSize, isScalar ARM64_ARG(isScalable)); if (newId != NI_Illegal) { @@ -11610,7 +11613,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) NamedIntrinsic addIntrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_ADD, op1, op2, simdBaseType, simdSize, - isScalar); + isScalar ARM64_ARG(isScalable)); tree->ChangeHWIntrinsicId(addIntrinsic, op1, op2); return fgMorphHWIntrinsicRequired(tree); @@ -11621,7 +11624,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) { #if defined(TARGET_ARM64) // xarch doesn't have a native GT_NEG representation for integers and itself uses (Zero - v1) - op2 = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); DEBUG_DESTROY_NODE(op1); DEBUG_DESTROY_NODE(tree); @@ -11631,7 +11634,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) } else { - op2 = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize ARM64_ARG(isScalable)); #if defined(TARGET_XARCH) if (varTypeIsFloating(simdBaseType)) @@ -11646,7 +11649,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) NamedIntrinsic addIntrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(this, GT_ADD, op2, op1, simdBaseType, simdSize, - isScalar); + isScalar ARM64_ARG(isScalable)); tree->ChangeHWIntrinsicId(addIntrinsic, op2, op1); @@ -11672,7 +11675,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) if (op2->IsVectorAllBitsSet()) { // xarch doesn't have a native GT_NOT representation and itself uses (v1 ^ AllBitsSet) - op1 = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize ARM64_ARG(isScalable)); DEBUG_DESTROY_NODE(op2); DEBUG_DESTROY_NODE(tree); @@ -11683,7 +11686,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) if (varTypeIsFloating(simdBaseType) && op2->IsVectorNegativeZero(simdBaseType)) { // xarch doesn't have a native GT_NEG representation for floating-point and itself uses (v1 ^ -0.0) - op1 = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize); + op1 = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize ARM64_ARG(isScalable)); DEBUG_DESTROY_NODE(op2); DEBUG_DESTROY_NODE(tree); @@ -11702,7 +11705,7 @@ GenTree* Compiler::fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree) if (opts.OptimizationEnabled()) { - return fgOptimizeHWIntrinsic(tree); + return fgOptimizeHWIntrinsic(tree ARM64_ARG(isScalable)); } return tree; } From 7f88033009d1b80bdc860e9ead1343b2dae4b7aa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jun 2025 00:03:18 -0700 Subject: [PATCH 120/120] fix parameter ordering because of bad merge conflict resolution --- src/coreclr/jit/gentree.cpp | 30 +++++++++++++++--------------- src/coreclr/jit/morph.cpp | 8 ++++---- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 695a02d2c97e4c..8b836957021b02 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -30957,15 +30957,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, // Returns: // The intrinsic ID based on the oper, base type, and simd size // -NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isScalar, - bool reverseCond ARM64_ARG(bool isScalable)) +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isScalar ARM64_ARG(bool isScalable), + bool reverseCond) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); @@ -31347,12 +31347,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* com // type doesn't match with the type IR wants us to be producing. For example, the consuming node // may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK. // -var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(Compiler* comp, - genTreeOps oper, - var_types type, - var_types simdBaseType, - unsigned simdSize, - bool reverseCond ARM64_ARG(bool isScalable)) +var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(Compiler* comp, + genTreeOps oper, + var_types type, + var_types simdBaseType, + unsigned simdSize ARM64_ARG(bool isScalable), + bool reverseCond) { var_types simdType = comp->getSIMDTypeForSize(simdSize); assert(varTypeIsMask(type) || (type == simdType)); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 000f94c2100f6a..fd8d417642ab40 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9874,12 +9874,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node ARM64_ARG(bool const bool reverseCond = true; var_types lookupType = - GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op1Oper, op1RetType, op1SimdBaseType, op1SimdSize, - reverseCond); + GenTreeHWIntrinsic::GetLookupTypeForCmpOp(this, op1Oper, op1RetType, op1SimdBaseType, + op1SimdSize ARM64_ARG(isScalable), reverseCond); NamedIntrinsic newId = GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(this, op1Oper, lookupType, cmpOp1, cmpOp2, - op1SimdBaseType, op1SimdSize, op1IsScalar, - reverseCond); + op1SimdBaseType, op1SimdSize, + op1IsScalar ARM64_ARG(isScalable), reverseCond); if (newId != NI_Illegal) {