Skip to content
33 changes: 31 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29021,6 +29021,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_AndMask:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_And:
case NI_Sve_And:
#endif
{
return GT_AND;
Expand All @@ -29030,6 +29031,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_NotMask:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Not:
case NI_Sve_Not:
#endif
{
return GT_NOT;
Expand All @@ -29043,6 +29045,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_XorMask:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Xor:
case NI_Sve_Xor:
#endif
{
return GT_XOR;
Expand All @@ -29056,6 +29059,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_OrMask:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Or:
case NI_Sve_Or:
#endif
{
return GT_OR;
Expand All @@ -29069,6 +29073,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_AndNotMask:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_BitwiseClear:
case NI_Sve_BitwiseClear:
#endif
{
return GT_AND_NOT;
Expand All @@ -29082,6 +29087,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Add:
case NI_AdvSimd_Arm64_Add:
case NI_Sve_Add:
#endif
{
return GT_ADD;
Expand Down Expand Up @@ -29113,6 +29119,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_Divide:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Arm64_Divide:
case NI_Sve_Divide:
#endif
{
return GT_DIV;
Expand Down Expand Up @@ -29146,6 +29153,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Multiply:
case NI_AdvSimd_Arm64_Multiply:
case NI_Sve_Multiply:
#endif
{
return GT_MUL;
Expand Down Expand Up @@ -29186,6 +29194,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
#if defined(TARGET_ARM64)
case NI_AdvSimd_Negate:
case NI_AdvSimd_Arm64_Negate:
case NI_Sve_Negate:
{
return GT_NEG;
}
Expand Down Expand Up @@ -29223,6 +29232,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_ShiftLeftLogicalVariable:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_ShiftLeftLogical:
case NI_Sve_ShiftLeftLogical:
#endif
{
return GT_LSH;
Expand All @@ -29247,6 +29257,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_ShiftRightArithmeticVariable:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_ShiftRightArithmetic:
case NI_Sve_ShiftRightArithmetic:
#endif
{
return GT_RSH;
Expand All @@ -29271,6 +29282,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
case NI_AVX512_ShiftRightLogicalVariable:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_ShiftRightLogical:
case NI_Sve_ShiftRightLogical:
#endif
{
return GT_RSZ;
Expand All @@ -29295,6 +29307,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Subtract:
case NI_AdvSimd_Arm64_Subtract:
case NI_Sve_Subtract:
#endif
{
return GT_SUB;
Expand Down Expand Up @@ -31889,6 +31902,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

#ifdef FEATURE_MASKED_HW_INTRINSICS
#ifdef TARGET_XARCH

if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper))
{
// Comparisons that produce masks lead to more verbose trees than
Expand Down Expand Up @@ -32572,9 +32586,9 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
else
{
#if defined(TARGET_XARCH)
if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ))
{
#if defined(TARGET_XARCH)
if (otherNode->TypeIs(TYP_SIMD16))
{
if (!HWIntrinsicInfo::IsVariableShift(ni))
Expand All @@ -32598,8 +32612,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
otherNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, shiftAmount);
}
}
}
#elif defined(TARGET_ARM64)
CorInfoType auxJitType = tree->GetAuxiliaryJitType();
if (auxJitType != CORINFO_TYPE_UNDEF &&
genTypeSize(JITtype2varType(auxJitType)) != genTypeSize(simdBaseType))
{
// Handle the "wide elements" variant of shift, where otherNode is a vector of ulongs,
// which is looped over to read the shift values. The values can safely be narrowed
// to the result type.
assert(auxJitType == CORINFO_TYPE_ULONG);
assert(tree->TypeIs(TYP_SIMD16));

simd16_t result = {};
NarrowSimdLong<simd16_t>(simdBaseType, &result, otherNode->AsVecCon()->gtSimd16Val);
otherNode->AsVecCon()->gtSimd16Val = result;
}
#endif // TARGET_XARCH
}

if (otherNode->IsIntegralConst())
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}

case NI_AdvSimd_BitwiseClear:
case NI_Sve_BitwiseClear:
case NI_Vector64_AndNot:
case NI_Vector128_AndNot:
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ HARDWARE_INTRINSIC(Sve, AddSaturate,
HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation)
HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant)
HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, {INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation)
HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant)
HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant)
HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Compact, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, {INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation)
Expand Down
63 changes: 63 additions & 0 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -2065,6 +2065,69 @@ SveMaskPattern EvaluateSimdMaskToPattern(var_types baseType, simdmask_t arg0)
}
}
}

template <typename TSimd, typename TBase>
void NarrowSimdLong(TSimd* result, const TSimd& arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(uint64_t);

for (uint32_t i = 0; i < count; i++)
{
uint64_t input0;
memcpy(&input0, &arg0.u8[(i * sizeof(TBase) / sizeof(uint64_t)) * sizeof(uint64_t)], sizeof(uint64_t));

// Saturate to largest value for TBase
if (input0 > (TBase)-1)
{
input0 = (TBase)-1;
}

memcpy(&result->u8[i * sizeof(TBase)], &input0, sizeof(TBase));
}
}

template <typename TSimd>
void NarrowSimdLong(var_types baseType, TSimd* result, const TSimd& arg0)
{
switch (baseType)
{
case TYP_FLOAT:
case TYP_INT:
case TYP_UINT:
{
NarrowSimdLong<TSimd, uint32_t>(result, arg0);
break;
}

case TYP_DOUBLE:
case TYP_LONG:
case TYP_ULONG:
{
NarrowSimdLong<TSimd, uint64_t>(result, arg0);
break;
}

case TYP_BYTE:
case TYP_UBYTE:
{
NarrowSimdLong<TSimd, uint8_t>(result, arg0);
break;
}

case TYP_SHORT:
case TYP_USHORT:
{
NarrowSimdLong<TSimd, uint16_t>(result, arg0);
break;
}

default:
{
unreached();
}
}
}

#endif // TARGET_ARM64

#endif // FEATURE_MASKED_HW_INTRINSICS
Expand Down
21 changes: 19 additions & 2 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8349,9 +8349,9 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(
}
}

#if defined(TARGET_XARCH)
if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ))
{
#if defined(TARGET_XARCH)
if (TypeOfVN(arg1VN) == TYP_SIMD16)
{
if (!HWIntrinsicInfo::IsVariableShift(ni))
Expand All @@ -8377,8 +8377,25 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(
}
}
}
}
#elif defined(TARGET_ARM64)
CorInfoType auxJitType = tree->GetAuxiliaryJitType();
if (auxJitType != CORINFO_TYPE_UNDEF &&
genTypeSize(JITtype2varType(auxJitType)) != genTypeSize(baseType))
{
// Handle the "wide elements" variant of shift, where arg1 is a vector of ulongs,
// which is looped over to read the shift values. The values can safely be narrowed
// to the result type.
assert(auxJitType == CORINFO_TYPE_ULONG);
assert(tree->TypeIs(TYP_SIMD16));

simd16_t arg1 = GetConstantSimd16(arg1VN);

simd16_t result = {};
NarrowSimdLong<simd16_t>(baseType, &result, arg1);
arg1VN = VNForSimd16Con(result);
}
#endif // TARGET_XARCH
}

return EvaluateBinarySimd(this, oper, isScalar, type, baseType, arg0VN, arg1VN);
}
Expand Down
Loading