Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3470,6 +3470,45 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Sve2_AddSaturate:
{
assert(sig->numArgs == 2);
assert(retType != TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

var_types argType1 = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
CorInfoType op1BaseJitType = getBaseJitTypeOfSIMDType(argClass);
var_types argType2 = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass);
assert(JitType2PreciseVarType(op1BaseJitType) == simdBaseType);

op2 = impPopStack().val;
op1 = impPopStack().val;

// NI_Sve2_AddSaturate has an OptionalEmbeddedMaskOperation flag so that it could fallback to the
// unpredicated AddSaturate instruction, while NI_Sve2_AddSaturateWithSigned/UnsignedAddend are
// always predicated. Therefore, separate HWIntrinsic IDs are used here instead of emitting the
// corresponding instructions in the codegen with the same HWIntrinsic.

if (op1BaseJitType != op2BaseJitType)
Comment thread
ylpoonlg marked this conversation as resolved.
Outdated
{
if (varTypeIsUnsigned(simdBaseType))
{
intrinsic = NI_Sve2_AddSaturateWithSignedAddend;
}
else
{
intrinsic = NI_Sve2_AddSaturateWithUnsignedAddend;
}
}

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize);
break;
}

default:
{
return nullptr;
Expand Down
7 changes: 4 additions & 3 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,8 @@ HARDWARE_INTRINSIC(Sve2, AddPairwiseWideningAndAdd,
HARDWARE_INTRINSIC(Sve2, AddRotateComplex, -1, 3, {INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingEven, -1, 2, {INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingOdd, -1, 3, {INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Sve2, AddSaturateRotateComplex, -1, 3, {INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddSaturateWithSignedAddend, -1, -1, {INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddSaturateWithUnsignedAddend, -1, -1, {INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddWideningEvenOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AddWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
Expand Down Expand Up @@ -486,6 +484,9 @@ HARDWARE_INTRINSIC(Sve, UnzipOdd_Predicates,
HARDWARE_INTRINSIC(Sve, TransposeEven_Predicates, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, TransposeOdd_Predicates, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
// Add saturate with sign/unsigned addend
HARDWARE_INTRINSIC(Sve2, AddSaturateWithSignedAddend, -1, -1, {INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddSaturateWithUnsignedAddend, -1, -1, {INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)

#endif // FEATURE_HW_INTRINSIC

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -723,32 +723,6 @@ internal Arm64() { }
/// </summary>
public static new Vector<ulong> AddSaturate(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

// Saturating complex add with rotate

/// <summary>
/// svint16_t svqcadd[_s16](svint16_t op1, svint16_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.H, Ztied1.H, Zop2.H, #imm_rotation
/// </summary>
public static Vector<short> AddSaturateRotateComplex(Vector<short> left, Vector<short> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svqcadd[_s32](svint32_t op1, svint32_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.S, Ztied1.S, Zop2.S, #imm_rotation
/// </summary>
public static Vector<int> AddSaturateRotateComplex(Vector<int> left, Vector<int> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svqcadd[_s64](svint64_t op1, svint64_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.D, Ztied1.D, Zop2.D, #imm_rotation
/// </summary>
public static Vector<long> AddSaturateRotateComplex(Vector<long> left, Vector<long> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svqcadd[_s8](svint8_t op1, svint8_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.B, Ztied1.B, Zop2.B, #imm_rotation
/// </summary>
public static Vector<sbyte> AddSaturateRotateComplex(Vector<sbyte> left, Vector<sbyte> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

// Saturating add with signed addend

/// <summary>
Expand All @@ -758,7 +732,7 @@ internal Arm64() { }
/// USQADD Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// USQADD Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// </summary>
public static Vector<byte> AddSaturateWithSignedAddend(Vector<byte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }
public static Vector<byte> AddSaturate(Vector<byte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svsqadd[_u16]_m(svbool_t pg, svuint16_t op1, svint16_t op2)
Expand All @@ -767,7 +741,7 @@ internal Arm64() { }
/// USQADD Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// USQADD Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// </summary>
public static Vector<ushort> AddSaturateWithSignedAddend(Vector<ushort> left, Vector<short> right) { throw new PlatformNotSupportedException(); }
public static Vector<ushort> AddSaturate(Vector<ushort> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svsqadd[_u32]_m(svbool_t pg, svuint32_t op1, svint32_t op2)
Expand All @@ -776,7 +750,7 @@ internal Arm64() { }
/// USQADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// USQADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// </summary>
public static Vector<uint> AddSaturateWithSignedAddend(Vector<uint> left, Vector<int> right) { throw new PlatformNotSupportedException(); }
public static Vector<uint> AddSaturate(Vector<uint> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svsqadd[_u64]_m(svbool_t pg, svuint64_t op1, svint64_t op2)
Expand All @@ -785,7 +759,7 @@ internal Arm64() { }
/// USQADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// USQADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// </summary>
public static Vector<ulong> AddSaturateWithSignedAddend(Vector<ulong> left, Vector<long> right) { throw new PlatformNotSupportedException(); }
public static Vector<ulong> AddSaturate(Vector<ulong> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

// Saturating add with unsigned addend

Expand All @@ -796,7 +770,7 @@ internal Arm64() { }
/// SUQADD Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// SUQADD Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// </summary>
public static Vector<sbyte> AddSaturateWithUnsignedAddend(Vector<sbyte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }
public static Vector<sbyte> AddSaturate(Vector<sbyte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svuqadd[_s16]_m(svbool_t pg, svint16_t op1, svuint16_t op2)
Expand All @@ -805,7 +779,7 @@ internal Arm64() { }
/// SUQADD Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// SUQADD Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// </summary>
public static Vector<short> AddSaturateWithUnsignedAddend(Vector<short> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }
public static Vector<short> AddSaturate(Vector<short> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svuqadd[_s32]_m(svbool_t pg, svint32_t op1, svuint32_t op2)
Expand All @@ -814,7 +788,7 @@ internal Arm64() { }
/// SUQADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// SUQADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// </summary>
public static Vector<int> AddSaturateWithUnsignedAddend(Vector<int> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }
public static Vector<int> AddSaturate(Vector<int> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svuqadd[_s64]_m(svbool_t pg, svint64_t op1, svuint64_t op2)
Expand All @@ -823,7 +797,33 @@ internal Arm64() { }
/// SUQADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// SUQADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// </summary>
public static Vector<long> AddSaturateWithUnsignedAddend(Vector<long> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
public static Vector<long> AddSaturate(Vector<long> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

// Saturating complex add with rotate

/// <summary>
/// svint16_t svqcadd[_s16](svint16_t op1, svint16_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.H, Ztied1.H, Zop2.H, #imm_rotation
/// </summary>
public static Vector<short> AddSaturateRotateComplex(Vector<short> left, Vector<short> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svqcadd[_s32](svint32_t op1, svint32_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.S, Ztied1.S, Zop2.S, #imm_rotation
/// </summary>
public static Vector<int> AddSaturateRotateComplex(Vector<int> left, Vector<int> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svqcadd[_s64](svint64_t op1, svint64_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.D, Ztied1.D, Zop2.D, #imm_rotation
/// </summary>
public static Vector<long> AddSaturateRotateComplex(Vector<long> left, Vector<long> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svqcadd[_s8](svint8_t op1, svint8_t op2, uint64_t imm_rotation)
/// SQCADD Ztied1.B, Ztied1.B, Zop2.B, #imm_rotation
/// </summary>
public static Vector<sbyte> AddSaturateRotateComplex(Vector<sbyte> left, Vector<sbyte> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }

// Add wide (bottom)

Expand Down
Loading
Loading