Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7835f72
Arm64 SVE: Fix conditionalselect with constant arguments
a74nh Jun 18, 2025
18340c8
Make masked EvaluateBinaryInPlace() Arm64 only
a74nh Jun 23, 2025
0d60a5e
Check significantBit in EvaluateSimdVectorToPattern()
a74nh Jun 23, 2025
0a36025
fix set checks in EvaluateSimdVectorToPattern
a74nh Jun 23, 2025
59107d0
Use masks in EvalHWIntrinsicFunTernary() for SVE conditionalselect
a74nh Jun 24, 2025
b923b28
Check all of a vector lane when converting to mask
a74nh Jun 24, 2025
c01bc22
Add testing for EvalHWIntrinsicFunTernary changes
a74nh Jun 24, 2025
f9c6dd6
whitespace
a74nh Jun 24, 2025
802ae0d
Revert "Check all of a vector lane when converting to mask"
a74nh Jun 24, 2025
3c3cb8f
rename significantBit to leastSignificantBit
a74nh Jun 24, 2025
c96e38c
Use LSB of vector when converting from vector to mask
a74nh Jun 25, 2025
9d2cebd
Add LowerCnsMask
a74nh Jun 27, 2025
70d601d
Add testcase
a74nh Jun 27, 2025
5428e1d
Remove EvaluateSimdMaskToPattern
a74nh Jun 27, 2025
a2d7aea
Revert "Use LSB of vector when converting from vector to mask"
a74nh Jun 27, 2025
c65fd38
formatting
a74nh Jun 27, 2025
f513c84
fix assert check
a74nh Jun 27, 2025
3bf4d1e
GenTree for gtNewSimdCvtVectorToMaskNode()
a74nh Jun 30, 2025
cd27a7c
Split NI_Sve_ConditionalSelect into it's own case
a74nh Jun 30, 2025
7856b87
Remove mask version of EvaluateBinaryInPlace
a74nh Jun 30, 2025
84d0408
remove assert
a74nh Jun 30, 2025
ed633f3
Check all bits in EvaluateSimdCvtVectorToMask
a74nh Jul 1, 2025
a53e4d1
Add ConstantVectors test
a74nh Jul 1, 2025
860ff75
merge main
a74nh Jul 2, 2025
cd52ec1
No need for DOTNET_EnableHWIntrinsic in csproj
a74nh Jul 2, 2025
748d297
Use IsMaskZero
a74nh Jul 2, 2025
090523a
Remove EvaluateBinarySimdAndMask
a74nh Jul 2, 2025
e7034bd
In lowering, default the mask type to byte
a74nh Jul 2, 2025
f14fc8e
In lowering, convert mask using byte basetype
a74nh Jul 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 59 additions & 11 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33525,28 +33525,63 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
case NI_Vector512_ConditionalSelect:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_BitwiseSelect:
case NI_Sve_ConditionalSelect:
#endif
{
assert(!varTypeIsMask(retType));
assert(!varTypeIsMask(op1));

if (cnsNode != op1)
{
break;
}

#if defined(TARGET_ARM64)
if (ni == NI_Sve_ConditionalSelect)
if (op1->IsVectorAllBitsSet())
{
assert(!op1->IsVectorAllBitsSet() && !op1->IsVectorZero());
if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0)
{
// op3 has side effects, this would require us to append a new statement
// to ensure that it isn't lost, which isn't safe to do from the general
// purpose handler here. We'll recognize this and mark it in VN instead
break;
}

// op3 has no side effects, so we can return op2 directly
return op2;
}
else

if (op1->IsVectorZero())
{
assert(!op1->IsTrueMask(simdBaseType) && !op1->IsMaskZero());
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
}

if (op2->IsCnsVec() && op3->IsCnsVec())
{
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());

// op2 = op2 | op3
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());

resultNode = op2;
}
break;
}

#if defined(TARGET_ARM64)
case NI_Sve_ConditionalSelect:
{
assert(!varTypeIsMask(retType));
assert(varTypeIsMask(op1));

if (cnsNode != op1)
{
break;
}
#endif

if (op1->IsVectorAllBitsSet() || op1->IsTrueMask(simdBaseType))
if (op1->IsTrueMask(simdBaseType))
{
if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0)
{
Expand All @@ -33560,18 +33595,30 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
return op2;
}

if (op1->IsVectorZero() || op1->IsMaskZero())
if (op1->IsMaskZero())
{
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
}

if (op2->IsCnsVec() && op3->IsCnsVec())
{
assert(op2->gtType == TYP_SIMD16);
assert(op3->gtType == TYP_SIMD16);

simd16_t op1SimdVal;
EvaluateSimdCvtMaskToVector<simd16_t>(simdBaseType, &op1SimdVal, op1->AsMskCon()->gtSimdMaskVal);

// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());
simd16_t result = {};
EvaluateBinarySimd<simd16_t>(GT_AND, false, simdBaseType, &result, op2->AsVecCon()->gtSimd16Val,
op1SimdVal);
op2->AsVecCon()->gtSimd16Val = result;

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());
result = {};
EvaluateBinarySimd<simd16_t>(GT_AND_NOT, false, simdBaseType, &result, op3->AsVecCon()->gtSimd16Val,
op1SimdVal);
op3->AsVecCon()->gtSimd16Val = result;

// op2 = op2 | op3
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());
Expand All @@ -33580,6 +33627,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
break;
}
#endif // TARGET_ARM64

default:
{
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,11 @@ GenTree* Lowering::LowerNode(GenTree* node)
LowerReturnSuspend(node);
break;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_ARM64)
case GT_CNS_MSK:
return LowerCnsMask(node->AsMskCon());
#endif // FEATURE_HW_INTRINSICS && TARGET_ARM64

default:
break;
}
Expand Down
11 changes: 6 additions & 5 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,12 @@ class Lowering final : public Phase
GenTree* TryLowerXorOpToGetMaskUpToLowestSetBit(GenTreeOp* xorNode);
void LowerBswapOp(GenTreeOp* node);
#elif defined(TARGET_ARM64)
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
void LowerModPow2(GenTree* node);
bool TryLowerAddForPossibleContainment(GenTreeOp* node, GenTree** next);
void StoreFFRValue(GenTreeHWIntrinsic* node);
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
void LowerModPow2(GenTree* node);
GenTree* LowerCnsMask(GenTreeMskCon* mask);
bool TryLowerAddForPossibleContainment(GenTreeOp* node, GenTree** next);
void StoreFFRValue(GenTreeHWIntrinsic* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
GenTree* InsertNewSimdCreateScalarUnsafeNode(var_types type,
GenTree* op1,
Expand Down
71 changes: 71 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,77 @@ void Lowering::LowerModPow2(GenTree* node)
ContainCheckNode(mod);
}

//------------------------------------------------------------------------
// LowerCnsMask: Lower GT_CNS_MSK. Ensure the mask matches a known pattern.
// If not then lower to a constant vector.
//
// Arguments:
// mask - the node to lower
//
GenTree* Lowering::LowerCnsMask(GenTreeMskCon* mask)
{
// Try every type until a match is found

if (mask->IsZero())
{
return mask->gtNext;
}

if (EvaluateSimdMaskToPattern<simd16_t>(TYP_BYTE, mask->gtSimdMaskVal) != SveMaskPatternNone)
{
return mask->gtNext;
}

if (EvaluateSimdMaskToPattern<simd16_t>(TYP_SHORT, mask->gtSimdMaskVal) != SveMaskPatternNone)
{
return mask->gtNext;
}

if (EvaluateSimdMaskToPattern<simd16_t>(TYP_INT, mask->gtSimdMaskVal) != SveMaskPatternNone)
{
return mask->gtNext;
}

if (EvaluateSimdMaskToPattern<simd16_t>(TYP_LONG, mask->gtSimdMaskVal) != SveMaskPatternNone)
{
return mask->gtNext;
}

// Not a valid pattern, so cannot be created using ptrue/pfalse. Instead the mask will require
// loading from memory. There is no way to load to a predicate from memory using a PC relative
// address, so instead use a constant vector plus conversion to mask. Using basetype byte will
// ensure every entry in the mask is converted.

LABELEDDISPTREERANGE("lowering cns mask to cns vector (before)", BlockRange(), mask);

// Create a vector constant
GenTreeVecCon* vecCon = comp->gtNewVconNode(TYP_SIMD16);
EvaluateSimdCvtMaskToVector<simd16_t>(TYP_BYTE, &vecCon->gtSimdVal, mask->gtSimdMaskVal);
BlockRange().InsertBefore(mask, vecCon);

// Convert the vector constant to a mask
GenTree* convertedVec = comp->gtNewSimdCvtVectorToMaskNode(TYP_MASK, vecCon, CORINFO_TYPE_BYTE, 16);
BlockRange().InsertBefore(mask, convertedVec->AsHWIntrinsic()->Op(1));
BlockRange().InsertBefore(mask, convertedVec);

// Update use
LIR::Use use;
if (BlockRange().TryGetUse(mask, &use))
{
use.ReplaceWith(convertedVec);
}
else
{
convertedVec->SetUnusedValue();
}

BlockRange().Remove(mask);

LABELEDDISPTREERANGE("lowering cns mask to cns vector (after)", BlockRange(), vecCon);

return vecCon->gtNext;
}

const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16;

//------------------------------------------------------------------------
Expand Down
27 changes: 12 additions & 15 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1598,35 +1598,32 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0)
uint32_t count = sizeof(TSimd) / sizeof(TBase);
uint64_t mask = 0;

TBase significantBit = 1;
#if defined(TARGET_XARCH)
significantBit = static_cast<TBase>(1) << ((sizeof(TBase) * 8) - 1);
TBase MostSignificantBit = static_cast<TBase>(1) << ((sizeof(TBase) * 8) - 1);
#endif

for (uint32_t i = 0; i < count; i++)
{
TBase input0;
memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase));

if ((input0 & significantBit) != 0)
{
#if defined(TARGET_XARCH)
// For xarch we have count sequential bits to write
// depending on if the corresponding the input element
// has its most significant bit set

// For xarch we have count sequential bits to write depending on if the
// corresponding the input element has its most significant bit set
if ((input0 & MostSignificantBit) != 0)
{
mask |= static_cast<uint64_t>(1) << i;
}
#elif defined(TARGET_ARM64)
// For Arm64 we have count total bits to write, but
// they are sizeof(TBase) bits apart. We set
// depending on if the corresponding input element
// has its least significant bit set

// For Arm64 we have count total bits to write, but they are sizeof(TBase) bits
// apart. We set depending on if the corresponding input element is non zero
if (input0 != 0)
{
mask |= static_cast<uint64_t>(1) << (i * sizeof(TBase));
}
#else
unreached();
unreached();
#endif
}
}

memcpy(&result->u8[0], &mask, sizeof(uint64_t));
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9145,6 +9145,30 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(
{
// (y & x) | (z & ~x)

#if defined(TARGET_ARM64)
if (ni == NI_Sve_ConditionalSelect)
{
assert(TypeOfVN(arg0VN) == TYP_MASK);
assert(type == TYP_SIMD16);

ValueNum maskVNSimd = EvaluateSimdCvtMaskToVector(this, type, baseType, arg0VN);
simd16_t maskVal = ::GetConstantSimd16(this, baseType, maskVNSimd);

simd16_t arg1 = ::GetConstantSimd16(this, baseType, arg1VN);
simd16_t arg2 = ::GetConstantSimd16(this, baseType, arg2VN);

simd16_t result = {};
EvaluateBinarySimd<simd16_t>(GT_AND, false, baseType, &result, arg1, maskVal);
ValueNum trueVN = VNForSimd16Con(result);

result = {};
EvaluateBinarySimd<simd16_t>(GT_AND_NOT, false, baseType, &result, arg2, maskVal);
ValueNum falseVN = VNForSimd16Con(result);

return EvaluateBinarySimd(this, GT_OR, false, type, baseType, trueVN, falseVN);
}
#endif // TARGET_ARM64

ValueNum trueVN = EvaluateBinarySimd(this, GT_AND, false, type, baseType, arg1VN, arg0VN);
ValueNum falseVN = EvaluateBinarySimd(this, GT_AND_NOT, false, type, baseType, arg2VN, arg0VN);

Expand Down
Loading
Loading