Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Centralize the folding logic for ConditionalSelect and ensure side effects aren't dropped #104175

Merged
merged 5 commits into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 85 additions & 44 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23435,18 +23435,14 @@ GenTree* Compiler::gtNewSimdCndSelNode(
NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));

if (canUseEvexEncoding())
if (simdSize == 64)
{
GenTree* control = gtNewIconNode(0xCA); // (B & A) | (C & ~A)
return gtNewSimdTernaryLogicNode(type, op1, op2, op3, control, simdBaseJitType, simdSize);
assert(canUseEvexEncodingDebugOnly());
intrinsic = NI_Vector512_ConditionalSelect;
}

assert(simdSize != 64);

if (simdSize == 32)
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_Vector256_ConditionalSelect;
}
else
Expand Down Expand Up @@ -26562,45 +26558,19 @@ GenTree* Compiler::gtNewSimdUnOpNode(
{
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));

if (genTypeSize(simdBaseType) >= 4)
{
intrinsic = NI_AVX512F_TernaryLogic;
}
}
else
{
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
}
if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
intrinsic = NI_AVX10v1_TernaryLogic;
}
else if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
{
intrinsic = NI_AVX512F_VL_TernaryLogic;
}
assert(canUseEvexEncodingDebugOnly());
intrinsic = NI_Vector512_op_OnesComplement;
}

if (intrinsic != NI_Illegal)
else if (simdSize == 32)
{
// AVX512 allows performing `not` without requiring a memory access
assert(canUseEvexEncodingDebugOnly());

op2 = gtNewZeroConNode(type);
op3 = gtNewZeroConNode(type);

GenTree* cns = gtNewIconNode(static_cast<uint8_t>(~0xAA)); // ~C
return gtNewSimdTernaryLogicNode(type, op3, op2, op1, cns, simdBaseJitType, simdSize);
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_Vector256_op_OnesComplement;
}
else
{
op2 = gtNewAllBitsSetConNode(type);
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize);
intrinsic = NI_Vector128_op_OnesComplement;
}
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
}
#elif defined(TARGET_ARM64)
case GT_NEG:
Expand Down Expand Up @@ -28150,12 +28120,16 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const
return GT_AND;
}

#if defined(TARGET_ARM64)
#if defined(TARGET_XARCH)
case NI_Vector128_op_OnesComplement:
case NI_Vector256_op_OnesComplement:
case NI_Vector512_op_OnesComplement:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Not:
#endif
{
return GT_NOT;
}
#endif

#if defined(TARGET_XARCH)
case NI_SSE_Xor:
Expand Down Expand Up @@ -30512,6 +30486,73 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
}

if (op3 != nullptr)
{
switch (ni)
{
case NI_Vector128_ConditionalSelect:
#if defined(TARGET_XARCH)
case NI_Vector256_ConditionalSelect:
case NI_Vector512_ConditionalSelect:
#elif defined(TARGET_ARM64)
case NI_Vector64_ConditionalSelect:
case NI_Sve_ConditionalSelect:
#endif
{
if (cnsNode != op1)
{
break;
}

if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
if ((op3->gtFlags & GTF_SIDE_EFFECT) == 0)
{
// op3 has no side effects, so we can return op2 directly
return op2;
}

if ((op2->gtFlags & GTF_SIDE_EFFECT) == 0)
{
// op2 has no side effects, but op3 does, we need to wrap
// so that the side effects for op3 are preserved
return gtWrapWithSideEffects(op2, op3, GTF_ALL_EFFECT);
}
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

// op2 and op3 both have side effects, we would have to evaluate op2
// into a local and then wrap the op3 with side effects returning that
// local, which isn't safe to do from the general purpose handler here
break;
}

if (op1->IsVectorZero())
{
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
}

if (op2->IsCnsVec() && op3->IsCnsVec())
{
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());

// op2 = op2 | op3
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());

resultNode = op2;
}
break;
}

default:
{
break;
}
}
}

if (resultNode != tree)
{
if (fgGlobalMorph)
Expand Down
13 changes: 0 additions & 13 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1903,19 +1903,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_Sve_ConditionalSelect:
{
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
{
return retNode->AsHWIntrinsic()->Op(2);
}
else if (op1->IsVectorZero())
{
return retNode->AsHWIntrinsic()->Op(3);
}
break;
}

case NI_Sve_GetActiveElementCount:
case NI_Sve_TestAnyTrue:
case NI_Sve_TestFirstTrue:
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down Expand Up @@ -221,7 +221,7 @@ HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down Expand Up @@ -321,7 +321,7 @@ HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down
Loading
Loading