From 8000044cfebf8f4887ee0469a6f0ae468ba19e27 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 28 Jan 2024 09:32:27 -0800 Subject: [PATCH 1/5] Update constant prop to only consider certain hwintrinsics --- src/coreclr/jit/assertionprop.cpp | 140 +++++++++++++++++++++++-- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/gentree.h | 6 +- src/coreclr/jit/hwintrinsic.h | 10 +- src/coreclr/jit/hwintrinsiclistarm64.h | 26 ++--- src/coreclr/jit/hwintrinsiclistxarch.h | 22 ++-- src/coreclr/jit/lowerxarch.cpp | 13 +-- src/coreclr/jit/valuenum.cpp | 55 +++++++++- 8 files changed, 231 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 195e28ee845ed..c0e1a8e15757c 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -2747,7 +2747,7 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab // the relop will evaluate to "true" or "false" statically, then the side-effects // will be put into new statements, presuming the JTrue will be folded away. // -GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) +GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, GenTree* tree) { if (tree->OperGet() == GT_JTRUE) { @@ -2995,7 +2995,7 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) if (conValTree != nullptr) { - if (!optIsProfitableToSubstitute(tree, block, conValTree)) + if (!optIsProfitableToSubstitute(tree, block, stmt, conValTree)) { // Not profitable to substitute return nullptr; @@ -3029,12 +3029,13 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* tree) // Arguments: // dest - destination to substitute value to // destBlock - Basic block of destination +// destStmt - Statement of destination // value - value we plan to substitute // // Returns: // False if it's likely not profitable to do substitution, True otherwise // -bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* value) +bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, Statement* destStmt, GenTree* value) { // Giving up on these kinds of handles demonstrated size improvements if (value->IsIconHandle(GTF_ICON_STATIC_HDL, GTF_ICON_CLASS_HDL)) @@ -3044,15 +3045,139 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, // A simple heuristic: If the constant is defined outside of a loop (not far from its head) // and is used inside it - don't propagate. - + // // TODO: Extend on more kinds of trees - if (!value->OperIs(GT_CNS_VEC, GT_CNS_DBL) || !dest->OperIs(GT_LCL_VAR)) + + if (!dest->OperIs(GT_LCL_VAR)) { return true; } const GenTreeLclVar* lcl = dest->AsLclVar(); + if (value->IsCnsVec()) + { +#if defined(FEATURE_HW_INTRINSICS) + GenTreeVecCon* vecCon = value->AsVecCon(); + + FindLinkData linkData = gtFindLink(destStmt, dest); + noway_assert(linkData.result != nullptr); + + if ((linkData.parent != nullptr) && linkData.parent->OperIsHWIntrinsic()) + { + GenTreeHWIntrinsic* parent = linkData.parent->AsHWIntrinsic(); + NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId(); + var_types simdBaseType = parent->GetSimdBaseType(); + + if (!HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId)) + { + // Many hwintrinsics can't benefit from constant prop because they don't support + // constant folding nor do they support any specialized encodings. So, we want to + // skip constant prop and preserve any user-defined locals in that scenario. + return false; + } + + switch (intrinsicId) + { +#if defined(TARGET_ARM64) + case NI_Vector64_op_Equality: + case NI_Vector64_op_Inequality: +#endif // TARGET_ARM64 + case NI_Vector128_op_Equality: + case NI_Vector128_op_Inequality: +#if defined(TARGET_XARCH) + case NI_Vector256_op_Equality: + case NI_Vector256_op_Inequality: + case NI_Vector512_op_Equality: + case NI_Vector512_op_Inequality: +#endif // TARGET_XARCH + { + // We can optimize when the constant is zero, but only + // for non floating-point since +0.0 == -0.0 + + if (!vecCon->IsZero() || varTypeIsFloating(simdBaseType)) + { + return false; + } + break; + } + +#if defined(TARGET_ARM64) + case NI_AdvSimd_CompareEqual: + case NI_AdvSimd_Arm64_CompareEqual: + case NI_AdvSimd_Arm64_CompareEqualScalar: + { + // We can optimize when the constant is zero due to a + // specialized encoding for the instruction + + if (!vecCon->IsZero()) + { + return false; + } + break; + } + + case NI_AdvSimd_CompareGreaterThan: + case NI_AdvSimd_CompareGreaterThanOrEqual: + case NI_AdvSimd_Arm64_CompareGreaterThan: + case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual: + case NI_AdvSimd_Arm64_CompareGreaterThanScalar: + case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar: + { + // We can optimize when the constant is zero, but only + // for signed types, due to a specialized encoding for + // the instruction + + if (!vecCon->IsZero() || varTypeIsUnsigned(simdBaseType)) + { + return false; + } + break; + } +#endif // TARGET_ARM64 + +#if defined(TARGET_XARCH) + case NI_SSE2_Insert: + case NI_SSE41_Insert: + case NI_SSE41_X64_Insert: + { + // We can optimize for float when the constant is zero + // due to a specialized encoding for the instruction + + if ((simdBaseType != TYP_FLOAT) || !vecCon->IsZero()) + { + return false; + } + break; + } + + case NI_AVX512F_CompareEqualMask: + case NI_AVX512F_CompareNotEqualMask: + { + // We can optimize when the constant is zero, but only + // for non floating-point since +0.0 == -0.0 + + if (!vecCon->IsZero() || varTypeIsFloating(simdBaseType)) + { + return false; + } + break; + } +#endif // TARGET_XARCH + + default: + { + break; + } + } + } +#endif // FEATURE_HW_INTRINSICS + } + else if (!value->IsCnsFltOrDbl()) + { + return true; + } + gtPrepareCost(value); if ((value->GetCostEx() > 1) && (value->GetCostSz() > 1)) @@ -6030,7 +6155,7 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta case GT_INTRINSIC: #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: -#endif +#endif // FEATURE_HW_INTRINSICS case GT_ARR_LENGTH: break; @@ -6078,7 +6203,8 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta } // Perform the constant propagation - GenTree* newTree = optVNConstantPropOnTree(block, tree); + GenTree* newTree = optVNConstantPropOnTree(block, stmt, tree); + if (newTree == nullptr) { // Not propagated, keep going. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 0225e62c96c8a..562854e9f1e3f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7697,7 +7697,7 @@ class Compiler void optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree); fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree); GenTree* optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test); - GenTree* optVNConstantPropOnTree(BasicBlock* block, GenTree* tree); + GenTree* optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, GenTree* tree); GenTree* optExtractSideEffListFromConst(GenTree* tree); AssertionIndex GetAssertionCount() @@ -7783,7 +7783,7 @@ class Compiler GenTree* optConstantAssertionProp(AssertionDsc* curAssertion, GenTreeLclVarCommon* tree, Statement* stmt DEBUGARG(AssertionIndex index)); - bool optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* value); + bool optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, Statement* destStmt, GenTree* value); bool optZeroObjAssertionProp(GenTree* tree, ASSERT_VALARG_TP assertions); // Assertion propagation functions. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 46cecfecb60b9..82a905c69a32f 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -884,7 +884,7 @@ struct GenTree bool isContainedVecImmed() const { - return isContained() && OperIs(GT_CNS_VEC); + return isContained() && IsCnsVec(); } bool isLclField() const @@ -9060,7 +9060,7 @@ inline bool GenTree::IsVectorCreate() const inline bool GenTree::IsVectorAllBitsSet() const { #ifdef FEATURE_SIMD - if (OperIs(GT_CNS_VEC)) + if (IsCnsVec()) { return AsVecCon()->IsAllBitsSet(); } @@ -9078,7 +9078,7 @@ inline bool GenTree::IsVectorAllBitsSet() const inline bool GenTree::IsVectorConst() { #ifdef FEATURE_SIMD - if (OperIs(GT_CNS_VEC)) + if (IsCnsVec()) { return true; } diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index dcd5c86129b74..c4bed5182f31a 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -205,8 +205,10 @@ enum HWIntrinsicFlag : unsigned int HW_Flag_EmbBroadcastCompatible = 0x8000000, // The intrinsic is an embedded rounding compatible intrinsic - HW_Flag_EmbRoundingCompatible = 0x10000000 + HW_Flag_EmbRoundingCompatible = 0x10000000, #endif // TARGET_XARCH + + HW_Flag_CanBenefitFromConstantProp = 0x80000000, }; #if defined(TARGET_XARCH) @@ -613,6 +615,12 @@ struct HWIntrinsicInfo } #endif // TARGET_XARCH + static bool CanBenefitFromConstantProp(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_CanBenefitFromConstantProp) != 0; + } + static bool IsMaybeCommutative(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index cb4c8269d6117..89543fc14b753 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -81,9 +81,9 @@ HARDWARE_INTRINSIC(Vector64, op_Addition, HARDWARE_INTRINSIC(Vector64, op_BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector64, op_BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector64, op_Division, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector64, op_ExclusiveOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector64, op_LeftShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, op_Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, op_OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -190,9 +190,9 @@ HARDWARE_INTRINSIC(Vector128, op_Addition, HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -258,9 +258,9 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, true, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, true, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, true, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, true, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, true, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, true, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, true, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, true, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, CompareLessThanOrEqual, -1, 2, true, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, CompareTest, -1, 2, true, {INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_invalid, INS_invalid, INS_cmtst, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) @@ -531,12 +531,12 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar, HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, true, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, true, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 893c9d011cf4b..6f735993a9bf2 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -100,9 +100,9 @@ HARDWARE_INTRINSIC(Vector128, op_Addition, HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -206,9 +206,9 @@ HARDWARE_INTRINSIC(Vector256, op_Addition, HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) @@ -309,9 +309,9 @@ HARDWARE_INTRINSIC(Vector512, op_Addition, HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -518,7 +518,7 @@ HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, false, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(SSE2, LoadAlignedVector128, 16, 1, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, LoadFence, 0, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) HARDWARE_INTRINSIC(SSE2, LoadHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) @@ -626,7 +626,7 @@ HARDWARE_INTRINSIC(SSE41, DotProduct, HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, true, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, Floor, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, FloorScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, Insert, 16, 3, true, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE41, Insert, 16, 3, true, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(SSE41, LoadAlignedVector128NonTemporal, 16, 1, false, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, Max, 16, 2, true, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) HARDWARE_INTRINSIC(SSE41, Min, 16, 2, true, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) @@ -655,7 +655,7 @@ HARDWARE_INTRINSIC(SSE41, TestZ, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE41 64-bit-only Intrinsics HARDWARE_INTRINSIC(SSE41_X64, Extract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41_X64, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE41_X64, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1316,12 +1316,12 @@ HARDWARE_INTRINSIC(AVX512F, AddMask, HARDWARE_INTRINSIC(AVX512F, AndMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, AndNotMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512F, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, CompareLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512F, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index ff9cd371570fa..f4db6646598b8 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -107,7 +107,7 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node) if (comp->IsBaselineVector512IsaSupportedOpportunistically() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - if (!node->Data()->OperIs(GT_CNS_VEC)) + if (!node->Data()->IsCnsVec()) { return; } @@ -116,7 +116,8 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node) { return; } - if (node->Data()->AsVecCon()->IsAllBitsSet() || node->Data()->AsVecCon()->IsZero()) + + if (node->Data()->IsVectorAllBitsSet() || node->Data()->IsVectorZero()) { // To avoid some unexpected regression, this optimization only applies to non-all 1/0 constant vectors. return; @@ -8739,7 +8740,7 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, // return true if compress success. void Lowering::TryCompressConstVecData(GenTreeStoreInd* node) { - assert(node->Data()->OperIs(GT_CNS_VEC)); + assert(node->Data()->IsCnsVec()); GenTreeVecCon* vecCon = node->Data()->AsVecCon(); GenTreeHWIntrinsic* broadcast = nullptr; @@ -9007,7 +9008,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } - assert(op1->OperIs(GT_CNS_VEC)); + assert(op1->IsCnsVec()); break; } @@ -9123,7 +9124,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (IsContainableHWIntrinsicOp(node, op2, &supportsOp2RegOptional)) { - if (op2->OperIs(GT_CNS_VEC) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F) && + if (op2->IsCnsVec() && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F) && node->OperIsEmbBroadcastCompatible()) { TryFoldCnsVecForEmbeddedBroadcast(node, op2->AsVecCon()); @@ -9137,7 +9138,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) (intrinsicId == NI_BMI2_X64_MultiplyNoFlags)) && IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional)) { - if (op1->OperIs(GT_CNS_VEC) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F) && + if (op1->IsCnsVec() && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F) && node->OperIsEmbBroadcastCompatible()) { TryFoldCnsVecForEmbeddedBroadcast(node, op1->AsVecCon()); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index ebfdd52da3b55..744e13e099eaf 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -7405,6 +7405,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_And: case NI_AVX_And: case NI_AVX2_And: + case NI_AVX512F_And: + case NI_AVX512DQ_And: #endif { return EvaluateBinarySimd(this, GT_AND, /* scalar */ false, type, baseType, arg0VN, arg1VN); @@ -7420,6 +7422,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: + case NI_AVX512F_AndNot: + case NI_AVX512DQ_AndNot: { // xarch does: ~arg0VN & arg1VN return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg1VN, arg0VN); @@ -7472,6 +7476,18 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, } #endif +#ifdef TARGET_XARCH + case NI_AVX512F_Multiply: + { + if (!varTypeIsFloating(baseType)) + { + // We don't support this for integrals since it returns a different size than the input + break; + } + FALLTHROUGH; + } +#endif // TARGET_XARCH + #ifdef TARGET_ARM64 case NI_AdvSimd_Multiply: case NI_AdvSimd_Arm64_Multiply: @@ -7482,6 +7498,10 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE41_MultiplyLow: case NI_AVX_Multiply: case NI_AVX2_MultiplyLow: + case NI_AVX512F_MultiplyLow: + case NI_AVX512BW_MultiplyLow: + case NI_AVX512DQ_MultiplyLow: + case NI_AVX512DQ_VL_MultiplyLow: #endif { return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); @@ -7504,6 +7524,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Or: case NI_AVX_Or: case NI_AVX2_Or: + case NI_AVX512F_Or: + case NI_AVX512DQ_Or: #endif { return EvaluateBinarySimd(this, GT_OR, /* scalar */ false, type, baseType, arg0VN, arg1VN); @@ -7669,6 +7691,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: + case NI_AVX512F_Xor: + case NI_AVX512DQ_Xor: #endif { return EvaluateBinarySimd(this, GT_XOR, /* scalar */ false, type, baseType, arg0VN, arg1VN); @@ -7717,6 +7741,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_And: case NI_AVX_And: case NI_AVX2_And: + case NI_AVX512F_And: + case NI_AVX512DQ_And: #endif { // Handle `x & 0 == 0` and `0 & x == 0` @@ -7744,6 +7770,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: + case NI_AVX512F_AndNot: + case NI_AVX512DQ_AndNot: { #ifdef TARGET_ARM64 if (cnsVN == arg0VN) @@ -7837,6 +7865,18 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, } #endif +#ifdef TARGET_XARCH + case NI_AVX512F_Multiply: + { + if (!varTypeIsFloating(baseType)) + { + // We don't support this for integrals since it returns a different size than the input + break; + } + FALLTHROUGH; + } +#endif // TARGET_XARCH + #ifdef TARGET_ARM64 case NI_AdvSimd_Multiply: case NI_AdvSimd_Arm64_Multiply: @@ -7847,7 +7887,6 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE41_MultiplyLow: case NI_AVX_Multiply: case NI_AVX2_MultiplyLow: - case NI_AVX512F_Multiply: case NI_AVX512F_MultiplyLow: case NI_AVX512BW_MultiplyLow: case NI_AVX512DQ_MultiplyLow: @@ -7893,6 +7932,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Or: case NI_AVX_Or: case NI_AVX2_Or: + case NI_AVX512F_Or: + case NI_AVX512DQ_Or: #endif { // Handle `x | 0 == x` and `0 | x == x` @@ -7980,6 +8021,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: + case NI_AVX512F_Xor: + case NI_AVX512DQ_Xor: #endif { // Handle `x | 0 == x` and `0 | x == x` @@ -8007,6 +8050,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_And: case NI_AVX_And: case NI_AVX2_And: + case NI_AVX512F_And: + case NI_AVX512DQ_And: #endif { // Handle `x & x == x` @@ -8020,6 +8065,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: + case NI_AVX512F_AndNot: + case NI_AVX512DQ_AndNot: { // Handle `x & ~x == 0` return VNZeroForType(type); @@ -8033,6 +8080,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Or: case NI_AVX_Or: case NI_AVX2_Or: + case NI_AVX512F_Or: + case NI_AVX512DQ_Or: #endif { // Handle `x | x == x` @@ -8068,6 +8117,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_SSE2_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: + case NI_AVX512F_Xor: + case NI_AVX512DQ_Xor: #endif { // Handle `x ^ x == 0` @@ -8144,7 +8195,6 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, { if (IsVNConstant(arg0VN) && IsVNConstant(arg1VN) && IsVNConstant(arg2VN)) { - switch (ni) { case NI_Vector128_WithElement: @@ -8170,6 +8220,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, return EvaluateSimdFloatWithElement(this, type, arg0VN, index, value); } + default: { break; From 1934fc4cf70f9a8c323d38c1fffb08e17a482440 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 28 Jan 2024 11:54:05 -0800 Subject: [PATCH 2/5] Don't use gtFindLink unnecessarily --- src/coreclr/jit/assertionprop.cpp | 42 +++++++++++++++---------------- src/coreclr/jit/compiler.h | 6 ++--- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index c0e1a8e15757c..b86818787e804 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -2724,10 +2724,10 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab // managing side-effects. // // Arguments: -// block - The block containing the tree. -// stmt - The statement in the block containing the tree. -// tree - The tree node whose value is known at compile time. -// The tree should have a constant value number. +// block - The block containing the tree. +// parent - The parent node of the tree. +// tree - The tree node whose value is known at compile time. +// The tree should have a constant value number. // // Return Value: // Returns a potentially new or a transformed tree node. @@ -2747,7 +2747,7 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab // the relop will evaluate to "true" or "false" statically, then the side-effects // will be put into new statements, presuming the JTrue will be folded away. // -GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, GenTree* tree) +GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* parent, GenTree* tree) { if (tree->OperGet() == GT_JTRUE) { @@ -2995,7 +2995,7 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, G if (conValTree != nullptr) { - if (!optIsProfitableToSubstitute(tree, block, stmt, conValTree)) + if (!optIsProfitableToSubstitute(tree, block, parent, conValTree)) { // Not profitable to substitute return nullptr; @@ -3027,15 +3027,15 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, G // optIsProfitableToSubstitute: Checks if value worth substituting to dest // // Arguments: -// dest - destination to substitute value to -// destBlock - Basic block of destination -// destStmt - Statement of destination -// value - value we plan to substitute +// dest - destination to substitute value to +// destBlock - Basic block of destination +// destParent - Parent of destination +// value - value we plan to substitute // // Returns: // False if it's likely not profitable to do substitution, True otherwise // -bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, Statement* destStmt, GenTree* value) +bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* destParent, GenTree* value) { // Giving up on these kinds of handles demonstrated size improvements if (value->IsIconHandle(GTF_ICON_STATIC_HDL, GTF_ICON_CLASS_HDL)) @@ -3060,12 +3060,9 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, #if defined(FEATURE_HW_INTRINSICS) GenTreeVecCon* vecCon = value->AsVecCon(); - FindLinkData linkData = gtFindLink(destStmt, dest); - noway_assert(linkData.result != nullptr); - - if ((linkData.parent != nullptr) && linkData.parent->OperIsHWIntrinsic()) + if ((destParent != nullptr) && destParent->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* parent = linkData.parent->AsHWIntrinsic(); + GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic(); NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId(); var_types simdBaseType = parent->GetSimdBaseType(); @@ -6100,9 +6097,10 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test) // This function is called as part of a post-order tree walk. // // Arguments: -// tree - The currently visited tree node. -// stmt - The statement node in which the "tree" is present. -// block - The block that contains the statement that contains the tree. +// tree - The currently visited tree node. +// stmt - The statement node in which the "tree" is present. +// parent - The parent node of the tree. +// block - The block that contains the statement that contains the tree. // // Return Value: // Returns the standard visitor walk result. @@ -6112,7 +6110,7 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test) // evaluates to constant, then the tree is replaced by its side effects and // the constant node. // -Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree) +Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree) { // Don't perform const prop on expressions marked with GTF_DONT_CSE // TODO-ASG: delete. @@ -6203,7 +6201,7 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta } // Perform the constant propagation - GenTree* newTree = optVNConstantPropOnTree(block, stmt, tree); + GenTree* newTree = optVNConstantPropOnTree(block, parent, tree); if (newTree == nullptr) { @@ -6284,7 +6282,7 @@ Compiler::fgWalkResult Compiler::optVNAssertionPropCurStmtVisitor(GenTree** ppTr pThis->optVnNonNullPropCurStmt(pData->block, pData->stmt, *ppTree); - return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, *ppTree); + return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, data->parent, *ppTree); } /***************************************************************************** diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 562854e9f1e3f..be658150f63e9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7695,9 +7695,9 @@ class Compiler public: void optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree); - fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree); + fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree); GenTree* optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test); - GenTree* optVNConstantPropOnTree(BasicBlock* block, Statement* stmt, GenTree* tree); + GenTree* optVNConstantPropOnTree(BasicBlock* block, GenTree* parent, GenTree* tree); GenTree* optExtractSideEffListFromConst(GenTree* tree); AssertionIndex GetAssertionCount() @@ -7783,7 +7783,7 @@ class Compiler GenTree* optConstantAssertionProp(AssertionDsc* curAssertion, GenTreeLclVarCommon* tree, Statement* stmt DEBUGARG(AssertionIndex index)); - bool optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, Statement* destStmt, GenTree* value); + bool optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* destParent, GenTree* value); bool optZeroObjAssertionProp(GenTree* tree, ASSERT_VALARG_TP assertions); // Assertion propagation functions. From 6327cafbc582da1c47f4b0b9e3e447878e4972e1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 28 Jan 2024 12:11:57 -0800 Subject: [PATCH 3/5] Apply formatting patch --- src/coreclr/jit/assertionprop.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index b86818787e804..e99a05a190f5e 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -6110,7 +6110,10 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test) // evaluates to constant, then the tree is replaced by its side effects and // the constant node. // -Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree) +Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, + Statement* stmt, + GenTree* parent, + GenTree* tree) { // Don't perform const prop on expressions marked with GTF_DONT_CSE // TODO-ASG: delete. From dbbdb4271fbfa098ceb4b4c039e990c0cb8ce8b3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 28 Jan 2024 15:02:06 -0800 Subject: [PATCH 4/5] Still allow constant propagation for single use locals --- src/coreclr/jit/assertionprop.cpp | 36 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index e99a05a190f5e..fe443142b1527 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -3058,19 +3058,45 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, if (value->IsCnsVec()) { #if defined(FEATURE_HW_INTRINSICS) - GenTreeVecCon* vecCon = value->AsVecCon(); + // Many hwintrinsics can't benefit from constant prop because they don't support + // constant folding nor do they support any specialized encodings. So, we want to + // skip constant prop and preserve any user-defined locals in that scenario. + // + // However, if the local is only referenced once then we want to allow propagation + // regardless since we can then contain the only actual usage and save a needless + // instruction. + // + // To determine number of uses, we prefer checking SSA first since it is more exact + // and can account for patterns where a local is reassigned later. However, if we + // can't find an SSA then we fallback to the naive ref count of the local, noting + // that we need to check for greater than 2 since it includes both the def and use. + + bool inspectIntrinsic = false; if ((destParent != nullptr) && destParent->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic(); + LclVarDsc* varDsc = lvaGetDesc(lcl); + + if (lcl->HasSsaName()) + { + inspectIntrinsic = varDsc->GetPerSsaData(lcl->GetSsaNum())->GetNumUses() > 1; + } + else + { + inspectIntrinsic = varDsc->lvRefCnt() > 2; + } + } + + if (inspectIntrinsic) + { + GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic(); + GenTreeVecCon* vecCon = value->AsVecCon(); + NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId(); var_types simdBaseType = parent->GetSimdBaseType(); if (!HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId)) { - // Many hwintrinsics can't benefit from constant prop because they don't support - // constant folding nor do they support any specialized encodings. So, we want to - // skip constant prop and preserve any user-defined locals in that scenario. return false; } From 3fc1000b8e9268431cb12426c7a92c0912772bf6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 28 Jan 2024 16:01:11 -0800 Subject: [PATCH 5/5] Apply formatting patch --- src/coreclr/jit/assertionprop.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index fe443142b1527..9dcdc79fd25af 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -3092,8 +3092,8 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic(); GenTreeVecCon* vecCon = value->AsVecCon(); - NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId(); - var_types simdBaseType = parent->GetSimdBaseType(); + NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId(); + var_types simdBaseType = parent->GetSimdBaseType(); if (!HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId)) {