diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d63fa0d036d69d..a03afd93a9759f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2338,50 +2338,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre break; } - - case GT_CNS_MSK: - { - GenTreeMskCon* mask = tree->AsMskCon(); - emitter* emit = GetEmitter(); - - // Try every type until a match is found - - if (mask->IsZero()) - { - emit->emitInsSve_R(INS_sve_pfalse, EA_SCALABLE, targetReg, INS_OPTS_SCALABLE_B); - break; - } - - insOpts opt = INS_OPTS_SCALABLE_B; - SveMaskPattern pat = EvaluateSimdMaskToPattern(TYP_BYTE, mask->gtSimdMaskVal); - - if (pat == SveMaskPatternNone) - { - opt = INS_OPTS_SCALABLE_H; - pat = EvaluateSimdMaskToPattern(TYP_SHORT, mask->gtSimdMaskVal); - } - - if (pat == SveMaskPatternNone) - { - opt = INS_OPTS_SCALABLE_S; - pat = EvaluateSimdMaskToPattern(TYP_INT, mask->gtSimdMaskVal); - } - - if (pat == SveMaskPatternNone) - { - opt = INS_OPTS_SCALABLE_D; - pat = EvaluateSimdMaskToPattern(TYP_LONG, mask->gtSimdMaskVal); - } - - // Should only ever create constant masks for valid patterns. - if (pat == SveMaskPatternNone) - { - unreached(); - } - - emit->emitIns_R_PATTERN(INS_sve_ptrue, EA_SCALABLE, targetReg, opt, (insSvePattern)pat); - break; - } #endif // FEATURE_SIMD default: diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c418dcf0bde19f..90871de7c93730 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3146,8 +3146,8 @@ class Compiler var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); #if defined(TARGET_ARM64) - GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType); - GenTree* gtNewSimdFalseMaskByteNode(); + GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, @@ -3707,7 +3707,6 @@ class Compiler #if defined(FEATURE_HW_INTRINSICS) GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree); - GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon); #endif // FEATURE_HW_INTRINSICS // Options to control behavior of gtTryRemoveBoxUpstreamEffects diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 22d59567e4d654..6a9ca955e03243 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21945,8 +21945,8 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, #if defined(TARGET_XARCH) return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512_ConvertVectorToMask, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - // ConvertVectorToMask uses cmpne which requires an embedded mask. - GenTree* trueMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_ConversionTrueMask, simdBaseJitType, simdSize); + // We use cmpne which requires an embedded mask. + GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, op1, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -32028,7 +32028,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #if defined(FEATURE_MASKED_HW_INTRINSICS) - // Fold ConvertMaskToVector(ConvertVectorToMask(vec)) to vec if (tree->OperIsConvertMaskToVector()) { GenTree* op = op1; @@ -32061,7 +32060,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } } - // Fold ConvertVectorToMask(ConvertMaskToVector(mask)) to mask if (tree->OperIsConvertVectorToMask()) { GenTree* op = op1; @@ -32070,9 +32068,11 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) #if defined(TARGET_XARCH) tryHandle = op->OperIsHWIntrinsic(); #elif defined(TARGET_ARM64) - assert(op->OperIsHWIntrinsic(NI_Sve_ConversionTrueMask)); - op = op2; - tryHandle = op->OperIsHWIntrinsic(); + if (op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll)) + { + op = op2; + tryHandle = op->OperIsHWIntrinsic(); + } #endif // TARGET_ARM64 if (tryHandle) @@ -32158,12 +32158,53 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) resultNode = gtNewVconNode(retType, &simdVal); } -#if defined(TARGET_XARCH) else if (tree->OperIsConvertVectorToMask()) { - resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon()); - } + GenTreeVecCon* vecCon = cnsNode->AsVecCon(); + GenTreeMskCon* mskCon = gtNewMskConNode(retType); + + switch (vecCon->TypeGet()) + { + case TYP_SIMD8: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val); + break; + } + + case TYP_SIMD12: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val); + break; + } + + case TYP_SIMD16: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val); + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val); + break; + } + + case TYP_SIMD64: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); + break; + } #endif // TARGET_XARCH + + default: + { + unreached(); + } + } + + resultNode = mskCon; + } #endif // FEATURE_MASKED_HW_INTRINSICS else { @@ -33006,10 +33047,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) switch (ni) { #ifdef TARGET_ARM64 - case NI_Sve_ConvertVectorToMask: - resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon()); - break; - case NI_AdvSimd_MultiplyByScalar: case NI_AdvSimd_Arm64_MultiplyByScalar: { @@ -33151,18 +33188,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } -#if defined(TARGET_ARM64) - if (ni == NI_Sve_ConditionalSelect) - { - assert(!op1->IsVectorAllBitsSet() && !op1->IsVectorZero()); - } - else - { - assert(!op1->IsTrueMask(simdBaseType) && !op1->IsFalseMask()); - } -#endif - - if (op1->IsVectorAllBitsSet() || op1->IsTrueMask(simdBaseType)) + if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet()) { if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0) { @@ -33176,7 +33202,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) return op2; } - if (op1->IsVectorZero() || op1->IsFalseMask()) + if (op1->IsVectorZero()) { return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT); } @@ -33228,70 +33254,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } return resultNode; } - -//------------------------------------------------------------------------------ -// gtFoldExprConvertVecCnsToMask: Folds a constant vector plus conversion to -// mask into a constant mask. -// -// Arguments: -// tree - The convert vector to mask node -// vecCon - The vector constant converted by the convert -// -// Return Value: -// Returns a constant mask -// -GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon) -{ - assert(tree->OperIsConvertVectorToMask()); - assert(vecCon == tree->Op(1) || vecCon == tree->Op(2)); - - var_types retType = tree->TypeGet(); - var_types simdBaseType = tree->GetSimdBaseType(); - GenTreeMskCon* mskCon = gtNewMskConNode(retType); - - switch (vecCon->TypeGet()) - { - case TYP_SIMD8: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val); - break; - } - - case TYP_SIMD12: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val); - break; - } - - case TYP_SIMD16: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val); - break; - } - -#if defined(TARGET_XARCH) - case TYP_SIMD32: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val); - break; - } - - case TYP_SIMD64: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); - break; - } -#endif // TARGET_XARCH - - default: - { - unreached(); - } - } - - return mskCon; -} - #endif // FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index c5d49fbacfca3a..5e425db7271d93 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1802,8 +1802,8 @@ struct GenTree inline bool IsVectorCreate() const; inline bool IsVectorAllBitsSet() const; inline bool IsVectorBroadcast(var_types simdBaseType) const; - inline bool IsTrueMask(var_types simdBaseType) const; - inline bool IsFalseMask() const; + inline bool IsMaskAllBitsSet() const; + inline bool IsMaskZero() const; inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType); @@ -9550,46 +9550,54 @@ inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const return false; } -//------------------------------------------------------------------------ -// IsTrueMask: Is the given node a true mask -// -// Arguments: -// simdBaseType - the base type of the mask -// -// Returns true if the node is a true mask for the given simdBaseType. -// -// Note that a byte true mask (1111...) is different to an int true mask -// (10001000...), therefore the simdBaseType of the mask needs to be -// taken into account. -// -inline bool GenTree::IsTrueMask(var_types simdBaseType) const +inline bool GenTree::IsMaskAllBitsSet() const { #ifdef TARGET_ARM64 - // TODO-SVE: For agnostic VL, vector type may not be simd16_t + static_assert_no_msg(AreContiguous(NI_Sve_CreateTrueMaskByte, NI_Sve_CreateTrueMaskDouble, + NI_Sve_CreateTrueMaskInt16, NI_Sve_CreateTrueMaskInt32, + NI_Sve_CreateTrueMaskInt64, NI_Sve_CreateTrueMaskSByte, + NI_Sve_CreateTrueMaskSingle, NI_Sve_CreateTrueMaskUInt16, + NI_Sve_CreateTrueMaskUInt32, NI_Sve_CreateTrueMaskUInt64)); - if (IsCnsMsk()) + if (OperIsHWIntrinsic()) { - return SveMaskPatternAll == EvaluateSimdMaskToPattern(simdBaseType, AsMskCon()->gtSimdMaskVal); + NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); + if (id == NI_Sve_ConvertMaskToVector) + { + GenTree* op1 = AsHWIntrinsic()->Op(1); + assert(op1->OperIsHWIntrinsic()); + id = op1->AsHWIntrinsic()->GetHWIntrinsicId(); + } + return ((id == NI_Sve_CreateTrueMaskAll) || + ((id >= NI_Sve_CreateTrueMaskByte) && (id <= NI_Sve_CreateTrueMaskUInt64))); } -#endif +#endif return false; } -//------------------------------------------------------------------------ -// IsFalseMask: Is the given node a false mask -// -// Returns true if the node is a false mask, ie all zeros -// -inline bool GenTree::IsFalseMask() const +inline bool GenTree::IsMaskZero() const { #ifdef TARGET_ARM64 - if (IsCnsMsk()) + static_assert_no_msg(AreContiguous(NI_Sve_CreateFalseMaskByte, NI_Sve_CreateFalseMaskDouble, + NI_Sve_CreateFalseMaskInt16, NI_Sve_CreateFalseMaskInt32, + NI_Sve_CreateFalseMaskInt64, NI_Sve_CreateFalseMaskSByte, + NI_Sve_CreateFalseMaskSingle, NI_Sve_CreateFalseMaskUInt16, + NI_Sve_CreateFalseMaskUInt32, NI_Sve_CreateFalseMaskUInt64)); + + if (OperIsHWIntrinsic()) { - return AsMskCon()->IsZero(); + NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); + if (id == NI_Sve_ConvertMaskToVector) + { + GenTree* op1 = AsHWIntrinsic()->Op(1); + assert(op1->OperIsHWIntrinsic()); + id = op1->AsHWIntrinsic()->GetHWIntrinsicId(); + } + return ((id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64)); } -#endif +#endif return false; } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index fbb9b984bd4e06..85cb1b8b95ba2a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2462,14 +2462,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig R2RARG(entryPoint), simdBaseJitType, nodeRetType, simdSize, mustExpand); - -#if defined(FEATURE_MASKED_HW_INTRINSICS) && defined(TARGET_ARM64) - if (retNode != nullptr) - { - // The special import may have switched the type of the node. - nodeRetType = retNode->gtType; - } -#endif } if (setMethodHandle && (retNode != nullptr)) @@ -2542,10 +2534,18 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } - if (nodeRetType == TYP_MASK) + if (retType != nodeRetType) { // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. - retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); + assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); + assert(nodeRetType == TYP_MASK); + + GenTreeHWIntrinsic* op = retNode->AsHWIntrinsic(); + + CorInfoType simdBaseJitType = op->GetSimdBaseJitType(); + unsigned simdSize = op->GetSimdSize(); + + retNode = gtNewSimdCvtMaskToVectorNode(retType, op, simdBaseJitType, simdSize); } #endif // FEATURE_MASKED_HW_INTRINSICS && TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c82a568c3b33a9..05a8304e9e1f9c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2822,56 +2822,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Sve_CreateFalseMaskByte: - case NI_Sve_CreateFalseMaskDouble: - case NI_Sve_CreateFalseMaskInt16: - case NI_Sve_CreateFalseMaskInt32: - case NI_Sve_CreateFalseMaskInt64: - case NI_Sve_CreateFalseMaskSByte: - case NI_Sve_CreateFalseMaskSingle: - case NI_Sve_CreateFalseMaskUInt16: - case NI_Sve_CreateFalseMaskUInt32: - case NI_Sve_CreateFalseMaskUInt64: - { - // Import as a constant vector 0 - GenTreeVecCon* vecCon = gtNewVconNode(retType); - vecCon->gtSimdVal = simd_t::Zero(); - retNode = vecCon; - break; - } - - case NI_Sve_CreateTrueMaskByte: - case NI_Sve_CreateTrueMaskDouble: - case NI_Sve_CreateTrueMaskInt16: - case NI_Sve_CreateTrueMaskInt32: - case NI_Sve_CreateTrueMaskInt64: - case NI_Sve_CreateTrueMaskSByte: - case NI_Sve_CreateTrueMaskSingle: - case NI_Sve_CreateTrueMaskUInt16: - case NI_Sve_CreateTrueMaskUInt32: - case NI_Sve_CreateTrueMaskUInt64: - { - assert(sig->numArgs == 1); - op1 = impPopStack().val; - - // Where possible, import a constant mask to allow for optimisations. - if (op1->IsIntegralConst()) - { - int64_t pattern = op1->AsIntConCommon()->IntegralValue(); - simd_t simdVal; - - if (EvaluateSimdPatternToVector(simdBaseType, &simdVal, (SveMaskPattern)pattern)) - { - retNode = gtNewVconNode(retType, &simdVal); - break; - } - } - - // Was not able to generate a pattern, instead import a truemaskall - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, intrinsic, simdBaseJitType, simdSize); - break; - } - case NI_Sve_Load2xVectorAndUnzip: case NI_Sve_Load3xVectorAndUnzip: case NI_Sve_Load4xVectorAndUnzip: @@ -3413,41 +3363,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdAllTrueMaskNode: Create a mask with all bits set to true +// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true // // Arguments: // simdBaseJitType -- the base jit type of the nodes being masked +// simdSize -- the simd size of the nodes being masked // // Return Value: // The mask // -GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType) +GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize) { - // Import as a constant mask - - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); - - // TODO-SVE: For agnostic VL, vector type may not be simd16_t - - bool found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); - assert(found); - - return mskCon; + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); } //------------------------------------------------------------------------ -// gtNewSimdFalseMaskByteNode: Create a mask with all bits set to false +// gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false +// +// Arguments: +// simdSize -- the simd size of the nodes being masked // // Return Value: // The mask // -GenTree* Compiler::gtNewSimdFalseMaskByteNode() +GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize) { - // Import as a constant mask 0 - GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); - mskCon->gtSimdMaskVal = simdmask_t::Zero(); - return mskCon; + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index ca38c26ab7c845..678b01727711c3 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -512,7 +512,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // destination using /Z. assert((targetReg != embMaskOp2Reg) || (embMaskOp1Reg == embMaskOp2Reg)); - assert(intrin.op3->isContained() || !intrin.op1->IsTrueMask(node->GetSimdBaseType())); + assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); } else @@ -610,7 +610,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(intrin.op3->IsVectorZero()); - if (intrin.op1->isContained() || intrin.op1->IsTrueMask(node->GetSimdBaseType())) + if (intrin.op1->isContained() || intrin.op1->IsMaskAllBitsSet()) { // We already skip importing ConditionalSelect if op1 == trueAll, however // if we still see it here, it is because we wrapped the predicated instruction @@ -2031,7 +2031,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Sve_ConversionTrueMask: + case NI_Sve_CreateTrueMaskAll: // Must use the pattern variant, as the non-pattern varient is SVE2.1. GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index bf13fc05b21104..be4235adadcb8e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -63,28 +63,28 @@ HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, {INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, {INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, {INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateMaskForFirstActiveElement, -1, 2, {INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, CreateMaskForNextActiveElement, -1, 2, {INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) @@ -350,8 +350,7 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) -// True mask only used inside a ConvertVectorToMask -HARDWARE_INTRINSIC(Sve, ConversionTrueMask, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) // Scalar variants of Saturating*By*BitElementCount. There is 8bit versions as the generic version is scalar only. HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 1d69b329e760bd..81d35e2519d37c 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1813,16 +1813,13 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_FusedMultiplyAddScalar: LowerHWIntrinsicFusedMultiplyAddScalar(node); break; - case NI_Sve_ConditionalSelect: return LowerHWIntrinsicCndSel(node); - case NI_Sve_SetFfr: { StoreFFRValue(node); break; } - case NI_Sve_GetFfrByte: case NI_Sve_GetFfrInt16: case NI_Sve_GetFfrInt32: @@ -1971,7 +1968,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) var_types simdType = Compiler::getSIMDTypeForSize(simdSize); bool foundUse = BlockRange().TryGetUse(node, &use); - GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType); + GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); GenTree* falseVal = comp->gtNewZeroConNode(simdType); var_types nodeType = simdType; @@ -3942,12 +3939,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op3 = intrin.op3; // Handle op1 - if (op1->IsFalseMask()) + if (op1->IsVectorZero()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. MakeSrcContained(node, op1); - LABELEDDISPTREERANGE("Contained false mask op1 in ConditionalSelect", BlockRange(), op1); } // Handle op2 @@ -3957,15 +3953,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic()) { - bool contain = false; uint32_t maskSize = genTypeSize(node->GetSimdBaseType()); uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); - if (maskSize == operSize) { // If the size of baseType of operation matches that of maskType, then contain // the operation - contain = true; + MakeSrcContained(node, op2); + op2->MakeEmbMaskOp(); } else { @@ -3984,16 +3979,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) uint32_t auxSize = genTypeSize(embOp->GetAuxiliaryType()); if (maskSize == auxSize) { - contain = true; + MakeSrcContained(node, op2); + op2->MakeEmbMaskOp(); } } - - if (contain) - { - MakeSrcContained(node, op2); - op2->MakeEmbMaskOp(); - LABELEDDISPTREERANGE("Contained op2 in ConditionalSelect", BlockRange(), node); - } } // Handle intrinsics with embedded masks and immediate operands @@ -4004,19 +3993,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (embOp->Op(2)->IsCnsIntOrI()) { MakeSrcContained(op2, embOp->Op(2)); - LABELEDDISPTREERANGE("Contained ShiftRight in ConditionalSelect", BlockRange(), op2); } } } // Handle op3 - if (op3->IsVectorZero() && op1->IsTrueMask(node->GetSimdBaseType()) && op2->IsEmbMaskOp()) + if (op3->IsVectorZero() && op1->IsMaskAllBitsSet()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. // Do this only if op1 was AllTrueMask MakeSrcContained(node, op3); - LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3); } break; @@ -4133,14 +4120,13 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this // optimisation when the nestedOp is a reduce operation. - if (nestedOp1->IsTrueMask(cndSelNode->GetSimdBaseType()) && - !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && + if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) { GenTree* nestedOp2 = nestedCndSel->Op(2); GenTree* nestedOp3 = nestedCndSel->Op(3); - LABELEDDISPTREERANGE("Removed nested conditionalselect (before)", BlockRange(), cndSelNode); + LABELEDDISPTREERANGE("Removed nested conditionalselect (before):", BlockRange(), cndSelNode); // Transform: // @@ -4158,7 +4144,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) } } } - else if (op1->IsTrueMask(cndSelNode->GetSimdBaseType())) + else if (op1->IsMaskAllBitsSet()) { // Any case where op2 is not an embedded HWIntrinsic if (!op2->OperIsHWIntrinsic() || diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1b15e34a7d8bf6..29beafdb321e85 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9727,10 +9727,7 @@ GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* n return node; } #elif defined(TARGET_ARM64) - // TODO-SVE: This optimisation is too naive. It needs to calculate the full cost of the instruction - // vs using the predicate version, taking into account all input arguements and all uses - // of the result. - // return fgMorphTryUseAllMaskVariant(node); + return fgMorphTryUseAllMaskVariant(node); #else #error Unsupported platform #endif @@ -9785,7 +9782,7 @@ GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* else if (node->IsVectorZero()) { // Morph the vector of zeroes into mask of zeroes. - GenTree* mask = gtNewSimdFalseMaskByteNode(); + GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); mask->SetMorphed(this); return mask; } diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 9841bdeb38c93c..d0450fa91caff6 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -1526,7 +1526,7 @@ void EvaluateSimdCvtMaskToVector(TSimd* result, simdmask_t arg0) isSet = ((mask >> i) & 1) != 0; #elif defined(TARGET_ARM64) // For Arm64 we have count total bits to read, but - // they are sizeof(TBase) bits apart. We set + // they are sizeof(TBase) bits apart. We still set // the result element to AllBitsSet or Zero depending // on the corresponding mask bit @@ -1598,17 +1598,14 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0) uint32_t count = sizeof(TSimd) / sizeof(TBase); uint64_t mask = 0; - TBase significantBit = 1; -#if defined(TARGET_XARCH) - significantBit = static_cast(1) << ((sizeof(TBase) * 8) - 1); -#endif + TBase mostSignificantBit = static_cast(1) << ((sizeof(TBase) * 8) - 1); for (uint32_t i = 0; i < count; i++) { TBase input0; memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase)); - if ((input0 & significantBit) != 0) + if ((input0 & mostSignificantBit) != 0) { #if defined(TARGET_XARCH) // For xarch we have count sequential bits to write @@ -1618,9 +1615,9 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0) mask |= static_cast(1) << i; #elif defined(TARGET_ARM64) // For Arm64 we have count total bits to write, but - // they are sizeof(TBase) bits apart. We set + // they are sizeof(TBase) bits apart. We still set // depending on if the corresponding input element - // has its least significant bit set + // has its most significant bit set mask |= static_cast(1) << (i * sizeof(TBase)); #else @@ -1673,328 +1670,6 @@ void EvaluateSimdCvtVectorToMask(var_types baseType, simdmask_t* result, TSimd a } } } - -#if defined(TARGET_ARM64) - -enum SveMaskPattern -{ - SveMaskPatternLargestPowerOf2 = 0, // The largest power of 2. - SveMaskPatternVectorCount1 = 1, // Exactly 1 element. - SveMaskPatternVectorCount2 = 2, // Exactly 2 elements. - SveMaskPatternVectorCount3 = 3, // Exactly 3 elements. - SveMaskPatternVectorCount4 = 4, // Exactly 4 elements. - SveMaskPatternVectorCount5 = 5, // Exactly 5 elements. - SveMaskPatternVectorCount6 = 6, // Exactly 6 elements. - SveMaskPatternVectorCount7 = 7, // Exactly 7 elements. - SveMaskPatternVectorCount8 = 8, // Exactly 8 elements. - SveMaskPatternVectorCount16 = 9, // Exactly 16 elements. - SveMaskPatternVectorCount32 = 10, // Exactly 32 elements. - SveMaskPatternVectorCount64 = 11, // Exactly 64 elements. - SveMaskPatternVectorCount128 = 12, // Exactly 128 elements. - SveMaskPatternVectorCount256 = 13, // Exactly 256 elements. - SveMaskPatternLargestMultipleOf4 = 29, // The largest multiple of 4. - SveMaskPatternLargestMultipleOf3 = 30, // The largest multiple of 3. - SveMaskPatternAll = 31, // All available (implicitly a multiple of two). - SveMaskPatternNone = 14 // Invalid -}; - -template -bool EvaluateSimdPatternToMask(simdmask_t* result, SveMaskPattern pattern) -{ - uint32_t count = sizeof(TSimd) / sizeof(TBase); - uint32_t finalOne = count + 1; - uint64_t mask = 0; - - switch (pattern) - { - case SveMaskPatternLargestPowerOf2: - case SveMaskPatternAll: - finalOne = count; - break; - - case SveMaskPatternVectorCount1: - case SveMaskPatternVectorCount2: - case SveMaskPatternVectorCount3: - case SveMaskPatternVectorCount4: - case SveMaskPatternVectorCount5: - case SveMaskPatternVectorCount6: - case SveMaskPatternVectorCount7: - case SveMaskPatternVectorCount8: - finalOne = pattern - SveMaskPatternVectorCount1 + 1; - break; - - case SveMaskPatternVectorCount16: - case SveMaskPatternVectorCount32: - case SveMaskPatternVectorCount64: - case SveMaskPatternVectorCount128: - case SveMaskPatternVectorCount256: - finalOne = std::min(uint32_t(16 << (pattern - SveMaskPatternVectorCount16)), count); - break; - - case SveMaskPatternLargestMultipleOf4: - finalOne = (count - (count % 4)); - break; - - case SveMaskPatternLargestMultipleOf3: - finalOne = (count - (count % 3)); - break; - - default: - return false; - } - assert(finalOne <= count); - assert(finalOne > 0); - - // Write finalOne number of bits - for (uint32_t i = 0; i < finalOne; i++) - { - mask |= static_cast(1) << (i * sizeof(TBase)); - } - - memcpy(&result->u8[0], &mask, sizeof(uint64_t)); - return true; -} - -template -bool EvaluateSimdPatternToMask(var_types baseType, simdmask_t* result, SveMaskPattern pattern) -{ - switch (baseType) - { - case TYP_FLOAT: - case TYP_INT: - case TYP_UINT: - { - return EvaluateSimdPatternToMask(result, pattern); - } - - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - { - return EvaluateSimdPatternToMask(result, pattern); - } - - case TYP_BYTE: - case TYP_UBYTE: - { - return EvaluateSimdPatternToMask(result, pattern); - } - - case TYP_SHORT: - case TYP_USHORT: - { - return EvaluateSimdPatternToMask(result, pattern); - } - - default: - { - unreached(); - } - } -} - -template -bool EvaluateSimdPatternToVector(simd_t* result, SveMaskPattern pattern) -{ - uint32_t count = sizeof(TSimd) / sizeof(TBase); - uint32_t finalOne = count + 1; - - switch (pattern) - { - case SveMaskPatternLargestPowerOf2: - case SveMaskPatternAll: - finalOne = count; - break; - - case SveMaskPatternVectorCount1: - case SveMaskPatternVectorCount2: - case SveMaskPatternVectorCount3: - case SveMaskPatternVectorCount4: - case SveMaskPatternVectorCount5: - case SveMaskPatternVectorCount6: - case SveMaskPatternVectorCount7: - case SveMaskPatternVectorCount8: - finalOne = std::min(uint32_t(pattern - SveMaskPatternVectorCount1 + 1), count); - break; - - case SveMaskPatternVectorCount16: - case SveMaskPatternVectorCount32: - case SveMaskPatternVectorCount64: - case SveMaskPatternVectorCount128: - case SveMaskPatternVectorCount256: - finalOne = std::min(uint32_t(16 << (pattern - SveMaskPatternVectorCount16)), count); - break; - - case SveMaskPatternLargestMultipleOf4: - finalOne = (count - (count % 4)); - break; - - case SveMaskPatternLargestMultipleOf3: - finalOne = (count - (count % 3)); - break; - - default: - return false; - } - assert(finalOne <= count); - assert(finalOne > 0); - - // Write finalOne number of entries - for (uint32_t i = 0; i < count; i++) - { - TBase output; - - if (i < finalOne) - { - memset(&output, 0xFF, sizeof(TBase)); - } - else - { - memset(&output, 0x00, sizeof(TBase)); - } - - memcpy(&result->u8[i * sizeof(TBase)], &output, sizeof(TBase)); - } - - return true; -} - -template -bool EvaluateSimdPatternToVector(var_types baseType, TSimd* result, SveMaskPattern pattern) -{ - switch (baseType) - { - case TYP_FLOAT: - case TYP_INT: - case TYP_UINT: - { - return EvaluateSimdPatternToVector(result, pattern); - } - - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - { - return EvaluateSimdPatternToVector(result, pattern); - } - - case TYP_BYTE: - case TYP_UBYTE: - { - return EvaluateSimdPatternToVector(result, pattern); - } - - case TYP_SHORT: - case TYP_USHORT: - { - return EvaluateSimdPatternToVector(result, pattern); - } - - default: - { - unreached(); - } - } -} - -template -SveMaskPattern EvaluateSimdMaskToPattern(simdmask_t arg0) -{ - uint32_t count = sizeof(TSimd) / sizeof(TBase); - - uint64_t mask; - memcpy(&mask, &arg0.u8[0], sizeof(uint64_t)); - uint32_t finalOne = count; - - // A mask pattern starts with zero of more 1s and then the rest of the mask is filled with 0s. - - // Find an unbroken sequence of 1s. - for (uint32_t i = 0; i < count; i++) - { - // For Arm64 we have count total bits to read, but - // they are sizeof(TBase) bits apart. We set - // the result element to AllBitsSet or Zero depending - // on the corresponding mask bit - - bool isSet = ((mask >> (i * sizeof(TBase))) & 1) != 0; - if (!isSet) - { - finalOne = i; - break; - } - } - - // Find an unbroken sequence of 0s. - for (uint32_t i = finalOne; i < count; i++) - { - // For Arm64 we have count total bits to read, but - // they are sizeof(TBase) bits apart. We set - // the result element to AllBitsSet or Zero depending - // on the corresponding mask bit - - bool isSet = ((mask >> (i * sizeof(TBase))) & 1) != 0; - if (isSet) - { - // Invalid sequence - return SveMaskPatternNone; - } - } - - if (finalOne == count) - { - return SveMaskPatternAll; - } - else if (finalOne >= SveMaskPatternVectorCount1 && finalOne <= SveMaskPatternVectorCount8) - { - return (SveMaskPattern)finalOne; - } - else - { - // TODO: Add other patterns as required. These probably won't be seen until we get - // to wider vector lengths. - return SveMaskPatternNone; - } -} - -template -SveMaskPattern EvaluateSimdMaskToPattern(var_types baseType, simdmask_t arg0) -{ - switch (baseType) - { - case TYP_FLOAT: - case TYP_INT: - case TYP_UINT: - { - return EvaluateSimdMaskToPattern(arg0); - } - - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - { - return EvaluateSimdMaskToPattern(arg0); - } - - case TYP_BYTE: - case TYP_UBYTE: - { - return EvaluateSimdMaskToPattern(arg0); - } - - case TYP_SHORT: - case TYP_USHORT: - { - return EvaluateSimdMaskToPattern(arg0); - } - - default: - { - unreached(); - } - } -} -#endif // TARGET_ARM64 - #endif // FEATURE_MASKED_HW_INTRINSICS #ifdef FEATURE_SIMD diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template index db2416974cdfd4..f5364238d58e01 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template @@ -25,7 +25,7 @@ namespace JIT.HardwareIntrinsics.Arm [Fact] public static void {TestName}() { - var test = new LoadNonFaultingUnaryOpTest__{TestName}(); + var test = new LoadUnaryOpTest__{TestName}(); if (test.IsSupported) { @@ -66,7 +66,7 @@ namespace JIT.HardwareIntrinsics.Arm } } - public sealed unsafe class LoadNonFaultingUnaryOpTest__{TestName} + public sealed unsafe class LoadUnaryOpTest__{TestName} { private struct DataTable { @@ -134,7 +134,7 @@ namespace JIT.HardwareIntrinsics.Arm return testStruct; } - public void RunStructFldScenario(LoadNonFaultingUnaryOpTest__{TestName} testClass) + public void RunStructFldScenario(LoadUnaryOpTest__{TestName} testClass) { var result = {Isa}.{Method}(({Op1BaseType}*)testClass._dataTable.inArray1Ptr); @@ -158,7 +158,7 @@ namespace JIT.HardwareIntrinsics.Arm private DataTable _dataTable; - public LoadNonFaultingUnaryOpTest__{TestName}() + public LoadUnaryOpTest__{TestName}() { Succeeded = true; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template index 829f9384c33610..6bec8d9481000a 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template @@ -24,7 +24,7 @@ namespace JIT.HardwareIntrinsics.Arm [Fact] public static void {TestName}() { - var test = new LoadVectorMaskTest__{TestName}(); + var test = new LoadUnaryOpTest__{TestName}(); if (test.IsSupported) { @@ -56,7 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm } } - public sealed unsafe class LoadVectorMaskTest__{TestName} + public sealed unsafe class LoadUnaryOpTest__{TestName} { private struct DataTable { @@ -121,7 +121,7 @@ namespace JIT.HardwareIntrinsics.Arm return testStruct; } - public void RunStructFldScenario(LoadVectorMaskTest__{TestName} testClass) + public void RunStructFldScenario(LoadUnaryOpTest__{TestName} testClass) { {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); @@ -148,7 +148,7 @@ namespace JIT.HardwareIntrinsics.Arm private DataTable _dataTable; - public LoadVectorMaskTest__{TestName}() + public LoadUnaryOpTest__{TestName}() { Succeeded = true; diff --git a/src/tests/JIT/opt/SVE/ConstantMasks.cs b/src/tests/JIT/opt/SVE/ConstantMasks.cs deleted file mode 100644 index 078e60e9b55411..00000000000000 --- a/src/tests/JIT/opt/SVE/ConstantMasks.cs +++ /dev/null @@ -1,232 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// Unit tests for the masks conversion optimization -// Uses vectors as masks and vice versa. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Threading; -using Xunit; - -public class ConstantMasks -{ - [MethodImpl(MethodImplOptions.NoInlining)] - private static void Consume(T value) { } - - [Fact] - public static void TestEntryPoint() - { - if (Sve.IsSupported) - { - Vector op1 = Vector.Create(11); - Vector op2 = Vector.Create(22); - Vector op3 = Vector.Create(33); - Vector opl1 = Vector.Create(44); - Vector opl2 = Vector.Create(55); - - CndSelectEmbedded(op1, op2, op3); - CndSelectEmbeddedFalseMask(op1, op2); - CndSelectEmbeddedZero(op1, op2); - CndSelectEmbeddedTrueMask(op1, op2); - CndSelectEmbeddedAllBits(op1, op2); - - CndSelectOptionalEmbedded(op1, op2, op3); - CndSelectOptionalEmbeddedFalseMask(op1, op2); - CndSelectOptionalEmbeddedZero(op1, op2); - CndSelectOptionalEmbeddedTrueMask(op1, op2); - CndSelectOptionalEmbeddedAllBits(op1, op2); - - CndSelectEmbeddedOneOp(op1, op2); - CndSelectEmbeddedOneOpFalseMask(op1, op2); - CndSelectEmbeddedOneOpZero(op1, op2); - CndSelectEmbeddedOneOpTrueMask(op1); - CndSelectEmbeddedOneOpAllBits(op1); - - CndSelectEmbeddedReduction(opl1, op2, opl2); - CndSelectEmbeddedReductionFalseMask(op1, opl1); - CndSelectEmbeddedReductionZero(op1, opl1); - CndSelectEmbeddedReductionTrueMask(op1, opl1); - CndSelectEmbeddedReductionAllBits(op1, opl1); - } - } - - // SVE operation (with embedded mask) inside a conditional select - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbedded(Vector mask, Vector op1, Vector op2) { - //ARM64-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedFalseMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), op2); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedZero(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), op2); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedTrueMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedAllBits(Vector op1, Vector op2) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), op1); - Consume(result); - } - - - // SVE operation (with optional embedded mask) inside a conditional select - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbedded(Vector mask, Vector op1, Vector op2) { - //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedFalseMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), op2); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedZero(Vector op1, Vector op2) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), op2); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedTrueMask(Vector op1, Vector op2) { - //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedAllBits(Vector op1, Vector op2) { - //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), op1); - Consume(result); - } - - - // SVE one op operation (with embedded mask) inside a conditional select - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedOneOp(Vector mask, Vector op1) { - //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(mask, Sve.Abs(op1), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedOneOpFalseMask(Vector dummy, Vector op1) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Abs(op1), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedOneOpZero(Vector dummy, Vector op1) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Abs(op1), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedOneOpTrueMask(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Abs(op1), op1); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedOneOpAllBits(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Abs(op1), op1); - Consume(result); - } - - - // SVE reduction operation (with embedded mask) inside a conditional select. - // The op and conditional select cannot be combined into one instruction. - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReduction(Vector mask, Vector op1, Vector opf) { - //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 - //ARM64-FULL-LINE-NEXT: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d - Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), opf); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionFalseMask(Vector op1, Vector opf) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), opf); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionZero(Vector op1, Vector opf) { - //ARM64-FULL-LINE: mov v0.16b, v1.16b - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), opf); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionTrueMask(Vector op1, Vector opf) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), opf); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionAllBits(Vector op1, Vector opf) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), opf); - Consume(result); - } - -} diff --git a/src/tests/JIT/opt/SVE/ConstantMasks.csproj b/src/tests/JIT/opt/SVE/ConstantMasks.csproj deleted file mode 100644 index 5482afbaa21aa8..00000000000000 --- a/src/tests/JIT/opt/SVE/ConstantMasks.csproj +++ /dev/null @@ -1,19 +0,0 @@ - - - - true - - - None - True - $(NoWarn),SYSLIB5003 - - - - true - - - - - - diff --git a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs deleted file mode 100644 index ba23ebe08f07c9..00000000000000 --- a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs +++ /dev/null @@ -1,309 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// Unit tests for the masks conversion optimization -// Uses vectors as masks and vice versa. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Threading; -using Xunit; - -public class ConstantMasks -{ - [MethodImpl(MethodImplOptions.NoInlining)] - private static void Consume(T value) { } - - [Fact] - public static void TestEntryPoint() - { - if (Sve.IsSupported) - { - Vector op1 = Vector.Create(11); - Vector op2 = Vector.Create(22); - Vector op3 = Vector.Create(33); - Vector opl1 = Vector.Create(44); - Vector opl2 = Vector.Create(55); - - CndSelectEmbeddedF(op1, op2, op3); - CndSelectEmbeddedZ(op1, op2, op3); - CndSelectEmbeddedFalseMaskF(op1, op2); - CndSelectEmbeddedFalseMaskZ(op1, op2); - CndSelectEmbeddedZeroF(op1, op2); - CndSelectEmbeddedZeroZ(op1, op2); - CndSelectEmbeddedTrueMaskF(op1, op2); - CndSelectEmbeddedTrueMaskZ(op1, op2); - CndSelectEmbeddedAllBitsF(op1, op2); - CndSelectEmbeddedAllBitsZ(op1, op2); - - CndSelectOptionalEmbeddedF(op1, op2, op3); - CndSelectOptionalEmbeddedZ(op1, op2, op3); - CndSelectOptionalEmbeddedFalseMaskF(op1, op2); - CndSelectOptionalEmbeddedFalseMaskZ(op1, op2); - CndSelectOptionalEmbeddedZeroF(op1, op2); - CndSelectOptionalEmbeddedZeroZ(op1, op2); - CndSelectOptionalEmbeddedTrueMaskF(op1, op2); - CndSelectOptionalEmbeddedTrueMaskZ(op1, op2); - CndSelectOptionalEmbeddedAllBitsF(op1, op2); - CndSelectOptionalEmbeddedAllBitsZ(op1, op2); - - CndSelectEmbeddedReductionF(opl1, op2); - CndSelectEmbeddedReductionZ(opl1, op2); - CndSelectEmbeddedReductionFalseMaskF(op1); - CndSelectEmbeddedReductionFalseMaskZ(op1); - CndSelectEmbeddedReductionZeroF(op1); - CndSelectEmbeddedReductionZeroZ(op1); - CndSelectEmbeddedReductionTrueMaskF(op1); - CndSelectEmbeddedReductionTrueMaskZ(op1); - CndSelectEmbeddedReductionAllBitsF(op1); - CndSelectEmbeddedReductionAllBitsZ(op1); - } - } - - // SVE operation (with embedded mask) inside a conditional select - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedF(Vector mask, Vector op1, Vector op2) { - //ARM6-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedZ(Vector mask, Vector op1, Vector op2) { - //ARM6-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedFalseMaskF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedFalseMaskZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedZeroF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedZeroZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedTrueMaskF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedTrueMaskZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM6-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s - //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedAllBitsF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: mvni {{v[0-9]+}}.4s, #0 - //ARM6-FULL-LINE-NEXT: cmpne {{p[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s, #0 - //ARM6-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s - //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedAllBitsZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: mvni {{v[0-9]+}}.4s, #0 - //ARM6-FULL-LINE-NEXT: cmpne {{p[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s, #0 - //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), Vector.Zero); - Consume(result); - } - - // SVE operation (with optional embedded mask) inside a conditional select - - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedF(Vector mask, Vector op1, Vector op2) { - //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedZ(Vector mask, Vector op1, Vector op2) { - //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedFalseMaskF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedFalseMaskZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedZeroF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedZeroZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 - var result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedTrueMaskF(Vector op1, Vector op2) { - //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedTrueMaskZ(Vector op1, Vector op2) { - //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedAllBitsF(Vector op1, Vector op2) { - //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectOptionalEmbeddedAllBitsZ(Vector op1, Vector op2) { - //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s - var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), Vector.Zero); - Consume(result); - } - - // SVE reduction operation (with embedded mask) inside a conditional select. - // The op and conditional select cannot be combined into one instruction. - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionF(Vector mask, Vector op1) { - //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movi {{v[0-9]+}}.4s, #0 - //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d - Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionZ(Vector mask, Vector op1) { - //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 - //ARM64-FULL-LINE-NEXT: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movi {{v[0-9]+}}.4s, #0 - //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d - Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionFalseMaskF(Vector op1) { - //ARM64-FULL-LINE: movi v0.4s, #0 - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionFalseMaskZ(Vector op1) { - //ARM64-FULL-LINE: movi v0.4s, #0 - Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionZeroF(Vector op1) { - //ARM64-FULL-LINE: movi v0.4s, #0 - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionZeroZ(Vector op1) { - //ARM64-FULL-LINE: movi v0.4s, #0 - Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionTrueMaskF(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionTrueMaskZ(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), Vector.Zero); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionAllBitsF(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); - Consume(result); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void CndSelectEmbeddedReductionAllBitsZ(Vector op1) { - //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s - //ARM64-FULL-LINE-NEXT: movz {{.*}} - Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), Vector.Zero); - Consume(result); - } -} diff --git a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj deleted file mode 100644 index 5482afbaa21aa8..00000000000000 --- a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj +++ /dev/null @@ -1,19 +0,0 @@ - - - - true - - - None - True - $(NoWarn),SYSLIB5003 - - - - true - - - - - - diff --git a/src/tests/JIT/opt/SVE/PredicateInstructions.cs b/src/tests/JIT/opt/SVE/PredicateInstructions.cs index b1336674f1638b..41b09c1fad3898 100644 --- a/src/tests/JIT/opt/SVE/PredicateInstructions.cs +++ b/src/tests/JIT/opt/SVE/PredicateInstructions.cs @@ -35,48 +35,56 @@ public static void TestPredicateInstructions() [MethodImpl(MethodImplOptions.NoInlining)] static Vector ZipLow() { + //ARM64-FULL-LINE: zip1 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.ZipLow(Vector.Zero, Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector ZipHigh() { + //ARM64-FULL-LINE: zip2 {{p[0-9]+}}.s, {{p[0-9]+}}.s, {{p[0-9]+}}.s return Sve.ZipHigh(Sve.CreateTrueMaskUInt32(), Sve.CreateTrueMaskUInt32()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector UnzipEven() { + //ARM64-FULL-LINE: uzp1 {{p[0-9]+}}.b, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.UnzipEven(Sve.CreateTrueMaskSByte(), Vector.Zero); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector UnzipOdd() { + //ARM64-FULL-LINE: uzp2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.UnzipOdd(Sve.CreateTrueMaskInt16(), Sve.CreateFalseMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector TransposeEven() { + //ARM64-FULL-LINE: trn1 {{p[0-9]+}}.d, {{p[0-9]+}}.d, {{p[0-9]+}}.d return Sve.TransposeEven(Sve.CreateFalseMaskInt64(), Sve.CreateTrueMaskInt64()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector TransposeOdd() { + //ARM64-FULL-LINE: trn2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.TransposeOdd(Vector.Zero, Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector ReverseElement() { + //ARM64-FULL-LINE: rev {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.ReverseElement(Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector And() { + //ARM64-FULL-LINE: and {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt16(), Sve.And(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -87,6 +95,7 @@ static Vector And() [MethodImpl(MethodImplOptions.NoInlining)] static Vector BitwiseClear() { + //ARM64-FULL-LINE: bic {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateFalseMaskInt16(), Sve.BitwiseClear(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -97,6 +106,7 @@ static Vector BitwiseClear() [MethodImpl(MethodImplOptions.NoInlining)] static Vector Xor() { + //ARM64-FULL-LINE: eor {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt32(), Sve.Xor(Sve.CreateTrueMaskInt32(), Sve.CreateTrueMaskInt32()), @@ -107,6 +117,7 @@ static Vector Xor() [MethodImpl(MethodImplOptions.NoInlining)] static Vector Or() { + //ARM64-FULL-LINE: orr {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt16(), Sve.Or(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -117,6 +128,7 @@ static Vector Or() [MethodImpl(MethodImplOptions.NoInlining)] static Vector ConditionalSelect() { + //ARM64-FULL-LINE: sel {{p[0-9]+}}.b, {{p[0-9]+}}, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Vector.Zero, Sve.CreateFalseMaskInt32(),