Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6696,15 +6696,6 @@ class Compiler
GenTree* fgMorphHWIntrinsicOptional(GenTreeHWIntrinsic* tree);
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node);
#endif // FEATURE_MASKED_HW_INTRINSICS
#ifdef TARGET_ARM64
bool canMorphVectorOperandToMask(GenTree* node);
bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node);
GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent);
GenTreeHWIntrinsic* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node);
#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);
Expand Down
215 changes: 208 additions & 7 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32112,10 +32112,8 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const
#if defined(FEATURE_HW_INTRINSICS)
GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
{
if (!opts.Tier0OptimizationEnabled())
{
return tree;
}
assert(!optValnumCSE_phase);
assert(opts.Tier0OptimizationEnabled());

NamedIntrinsic ni = tree->GetHWIntrinsicId();
var_types retType = tree->TypeGet();
Expand Down Expand Up @@ -32254,6 +32252,126 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
// We shouldn't find AND_NOT nodes since it should only be produced in lowering
assert(oper != GT_AND_NOT);

#if defined(FEATURE_MASKED_HW_INTRINSICS) && defined(TARGET_XARCH)
if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper))
{
// Comparisons that produce masks lead to more verbose trees than
// necessary in many scenarios due to requiring a CvtMaskToVector
// node to be inserted over them and this can block various opts
// that are dependent on tree height and similar. So we want to
// fold the unnecessary back and forth conversions away where possible.

genTreeOps effectiveOper = oper;
GenTree* actualOp2 = op2;

if (oper == GT_NOT)
{
assert(op2 == nullptr);
op2 = op1;
}

// We need both operands to be ConvertMaskToVector in
// order to optimize this to a direct mask operation

if (op1->OperIsConvertMaskToVector())
{
if (!op2->OperIsHWIntrinsic())
{
if ((oper == GT_XOR) && op2->IsVectorAllBitsSet())
{
// We want to explicitly recognize op1 ^ AllBitsSet as
// some platforms don't have direct support for ~op1

effectiveOper = GT_NOT;
op2 = op1;
}
}

if (op2->OperIsConvertMaskToVector())
{
GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic();
GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic();

unsigned simdBaseTypeSize = genTypeSize(simdBaseType);

if ((genTypeSize(cvtOp1->GetSimdBaseType()) == simdBaseTypeSize) &&
(genTypeSize(cvtOp2->GetSimdBaseType()) == simdBaseTypeSize))
{
// We need both operands to be the same kind of mask; otherwise
// the bitwise operation can differ in how it performs

NamedIntrinsic maskIntrinsicId = NI_Illegal;

switch (effectiveOper)
{
case GT_AND:
{
maskIntrinsicId = NI_AVX512_AndMask;
break;
}

case GT_NOT:
{
maskIntrinsicId = NI_AVX512_NotMask;
break;
}

case GT_OR:
{
maskIntrinsicId = NI_AVX512_OrMask;
break;
}

case GT_XOR:
{
maskIntrinsicId = NI_AVX512_XorMask;
break;
}

default:
{
unreached();
}
}

assert(maskIntrinsicId != NI_Illegal);

if (effectiveOper == oper)
{
tree->ChangeHWIntrinsicId(maskIntrinsicId);
tree->Op(1) = cvtOp1->Op(1);
}
else
{
assert(effectiveOper == GT_NOT);
tree->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1));
tree->gtFlags &= ~GTF_REVERSE_OPS;
}

tree->gtType = TYP_MASK;
DEBUG_DESTROY_NODE(op1);

if (effectiveOper != GT_NOT)
{
tree->Op(2) = cvtOp2->Op(1);
}

if (actualOp2 != nullptr)
{
DEBUG_DESTROY_NODE(actualOp2);
}
tree->SetMorphed(this);

tree = gtNewSimdCvtMaskToVectorNode(retType, tree, simdBaseJitType, simdSize)->AsHWIntrinsic();
tree->SetMorphed(this);

return tree;
}
}
}
}
#endif // FEATURE_MASKED_HW_INTRINSICS && TARGET_XARCH

switch (ni)
{
// There's certain IR simplifications that are possible and which
Expand Down Expand Up @@ -32830,10 +32948,28 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
oper = GT_NONE;
}

// For mask nodes in particular, the foldings below are done under the presumption
// that we only produce something like `AddMask(op1, op2)` if op1 and op2 are compatible
// masks. On xarch, for example, this means that it'd be adding 8, 16, 32, or 64-bits
// together with the same size. We wouldn't ever encounter something like an 8 and 16 bit
// masks being added. This ensures that we don't end up with a case where folding would
// cause a different result to be produced, such as because the remaining upper bits are
// no longer zeroed.

switch (oper)
{
case GT_ADD:
{
if (varTypeIsMask(retType))
{
// Handle `x + 0 == x` and `0 + x == x`
if (cnsNode->IsMaskZero())
{
resultNode = otherNode;
}
break;
}

if (varTypeIsFloating(simdBaseType))
{
// Handle `x + NaN == NaN` and `NaN + x == NaN`
Expand Down Expand Up @@ -32867,6 +33003,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

case GT_AND:
{
if (varTypeIsMask(retType))
{
// Handle `x & 0 == 0` and `0 & x == 0`
if (cnsNode->IsMaskZero())
{
resultNode = otherNode;
break;
}

// Handle `x & AllBitsSet == x` and `AllBitsSet & x == x`
if (cnsNode->IsMaskAllBitsSet())
{
resultNode = otherNode;
}
break;
}

// Handle `x & 0 == 0` and `0 & x == 0`
if (cnsNode->IsVectorZero())
{
Expand Down Expand Up @@ -33100,6 +33253,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

case GT_OR:
{
if (varTypeIsMask(retType))
{
// Handle `x | 0 == x` and `0 | x == x`
if (cnsNode->IsMaskZero())
{
resultNode = otherNode;
break;
}

// Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet`
if (cnsNode->IsMaskAllBitsSet())
{
resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
}
break;
}

// Handle `x | 0 == x` and `0 | x == x`
if (cnsNode->IsVectorZero())
{
Expand Down Expand Up @@ -33127,6 +33297,27 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
// Handle `x >> 0 == x` and `0 >> x == 0`
// Handle `x >>> 0 == x` and `0 >>> x == 0`

if (varTypeIsMask(retType))
{
if (cnsNode->IsMaskZero())
{
if (cnsNode == op2)
{
resultNode = otherNode;
}
else
{
resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
}
}
else if (cnsNode->IsIntegralConst(0))
{
assert(cnsNode == op2);
resultNode = otherNode;
}
break;
}

if (cnsNode->IsVectorZero())
{
if (cnsNode == op2)
Expand Down Expand Up @@ -33172,7 +33363,17 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

case GT_XOR:
{
// Handle `x | 0 == x` and `0 | x == x`
if (varTypeIsMask(retType))
{
// Handle `x ^ 0 == x` and `0 ^ x == x`
if (cnsNode->IsMaskZero())
{
resultNode = otherNode;
}
break;
}

// Handle `x ^ 0 == x` and `0 ^ x == x`
if (cnsNode->IsVectorZero())
{
resultNode = otherNode;
Expand Down Expand Up @@ -33341,7 +33542,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
else
{
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsFalseMask());
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsMaskZero());
}
#endif

Expand All @@ -33359,7 +33560,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
return op2;
}

if (op1->IsVectorZero() || op1->IsFalseMask())
if (op1->IsVectorZero() || op1->IsMaskZero())
{
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
}
Expand Down
56 changes: 38 additions & 18 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1803,8 +1803,9 @@ struct GenTree
inline bool IsVectorCreate() const;
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorBroadcast(var_types simdBaseType) const;
inline bool IsMaskZero() const;
inline bool IsMaskAllBitsSet() const;
inline bool IsTrueMask(var_types simdBaseType) const;
inline bool IsFalseMask() const;

inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);

Expand Down Expand Up @@ -9629,6 +9630,42 @@ inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const
return false;
}

//-------------------------------------------------------------------
// IsMaskZero: returns true if this node is a mask constant with all bits zero.
//
// Returns:
// True if this node is a mask constant with all bits zero
//
inline bool GenTree::IsMaskZero() const
{
#if defined(FEATURE_MASKED_HW_INTRINSICS)
if (IsCnsMsk())
{
return AsMskCon()->IsZero();
}
#endif // FEATURE_MASKED_HW_INTRINSICS

return false;
}

//-------------------------------------------------------------------
// IsMaskAllBitsSet: returns true if this node is a mask constant with all bits set.
//
// Returns:
// True if this node is a mask constant with all bits set
//
inline bool GenTree::IsMaskAllBitsSet() const
{
#if defined(FEATURE_MASKED_HW_INTRINSICS)
if (IsCnsMsk())
{
return AsMskCon()->IsAllBitsSet();
}
#endif // FEATURE_MASKED_HW_INTRINSICS

return false;
}

//------------------------------------------------------------------------
// IsTrueMask: Is the given node a true mask
//
Expand All @@ -9655,23 +9692,6 @@ inline bool GenTree::IsTrueMask(var_types simdBaseType) const
return false;
}

//------------------------------------------------------------------------
// IsFalseMask: Is the given node a false mask
//
// Returns true if the node is a false mask, ie all zeros
//
inline bool GenTree::IsFalseMask() const
{
#ifdef TARGET_ARM64
if (IsCnsMsk())
{
return AsMskCon()->IsZero();
}
#endif

return false;
}

//-------------------------------------------------------------------
// GetIntegralVectorConstElement: Gets the value of a given element in an integral vector constant
//
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3942,7 +3942,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
GenTree* op3 = intrin.op3;

// Handle op1
if (op1->IsFalseMask())
if (op1->IsMaskZero())
{
// When we are merging with zero, we can specialize
// and avoid instantiating the vector constant.
Expand Down
Loading
Loading