Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,15 +840,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case 3:
if (isRMW)
{
if (targetReg != op1Reg)
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);
if (targetReg != op2Reg)
{
assert(targetReg != op1Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg,
/* canSkip */ true);
}

GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt);
}
else
{
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
else
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ HARDWARE_INTRINSIC(Sve, PrefetchBytes,
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, false, {INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -184,6 +185,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Splice, -1, 3, true, {INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
Expand Down
37 changes: 33 additions & 4 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler);

bool tgtPrefOp1 = false;
bool op2IsTarget = (isRMW && HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id));
bool delayFreeMultiple = false;
if (intrin.op1 != nullptr)
{
Expand Down Expand Up @@ -1568,7 +1569,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
// we want to preference op1Reg to the target if op1 is not contained.
if (isRMW || simdRegToSimdRegMove)

if ((isRMW || simdRegToSimdRegMove) && !HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
tgtPrefOp1 = !intrin.op1->isContained();
}
Expand Down Expand Up @@ -1617,7 +1619,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
predMask = RBM_LOWMASK.GetPredicateRegSet();
}

srcCount += BuildOperandUses(intrin.op1, predMask);
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id) && isRMW)
{
srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2, predMask);
}
else
{
srcCount += BuildOperandUses(intrin.op1, predMask);
}
}
}
else if (intrinsicTree->OperIsMemoryLoadOrStore())
Expand Down Expand Up @@ -1978,6 +1987,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (op2IsTarget)
{
if (!intrin.op2->isContained())
{
tgtPrefUse = BuildUse(intrin.op2);
srcCount++;
}
else
{
srcCount += BuildOperandUses(intrin.op2);
}
}
else
{
switch (intrin.id)
Expand Down Expand Up @@ -2021,12 +2042,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE;

srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
: BuildOperandUses(intrin.op3, candidates);
if (op2IsTarget)
{
srcCount += BuildDelayFreeUses(intrin.op3, intrin.op2, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
: BuildOperandUses(intrin.op3, candidates);
}

if (intrin.op4 != nullptr)
{
assert(lowVectorOperandNum != 4);
assert(!op2IsTarget);
srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4207,6 +4207,57 @@ internal Arm64() { }
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }


/// Reverse bits

/// <summary>
/// svuint8_t svrbit[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
/// RBIT Ztied.B, Pg/M, Zop.B
/// </summary>
public static unsafe Vector<byte> ReverseBits(Vector<byte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svrbit[_s16]_m(svint16_t inactive, svbool_t pg, svint16_t op)
/// RBIT Ztied.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<short> ReverseBits(Vector<short> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svrbit[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
/// RBIT Ztied.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<int> ReverseBits(Vector<int> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svrbit[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
/// RBIT Ztied.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<long> ReverseBits(Vector<long> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svrbit[_s8]_m(svint8_t inactive, svbool_t pg, svint8_t op)
/// RBIT Ztied.B, Pg/M, Zop.B
/// </summary>
public static unsafe Vector<sbyte> ReverseBits(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svrbit[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
/// RBIT Ztied.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<ushort> ReverseBits(Vector<ushort> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svrbit[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
/// RBIT Ztied.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<uint> ReverseBits(Vector<uint> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svrbit[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
/// RBIT Ztied.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<ulong> ReverseBits(Vector<ulong> value) { throw new PlatformNotSupportedException(); }


/// Reverse all elements

/// <summary>
Expand Down Expand Up @@ -5002,6 +5053,69 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// Splice two vectors under predicate control

/// <summary>
/// svuint8_t svsplice[_u8](svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> Splice(Vector<byte> mask, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svsplice[_f64](svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Splice(Vector<double> mask, Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svsplice[_s16](svbool_t pg, svint16_t op1, svint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> Splice(Vector<short> mask, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svsplice[_s32](svbool_t pg, svint32_t op1, svint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> Splice(Vector<int> mask, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svsplice[_s64](svbool_t pg, svint64_t op1, svint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> Splice(Vector<long> mask, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svsplice[_s8](svbool_t pg, svint8_t op1, svint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> Splice(Vector<sbyte> mask, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svsplice[_f32](svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Splice(Vector<float> mask, Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svsplice[_u16](svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> Splice(Vector<ushort> mask, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svsplice[_u32](svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> Splice(Vector<uint> mask, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svsplice[_u64](svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> Splice(Vector<ulong> mask, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Non-truncating store

/// <summary>
Expand Down
Loading