Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] More ARM64 comparison instruction optimizations with Vector.Zero #64783

Merged
merged 15 commits into from
Feb 17, 2022
7 changes: 4 additions & 3 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12971,7 +12971,7 @@ void emitter::emitDispIns(
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
}
if (ins == INS_fcmeq)
if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt)
{
printf(", ");
emitDispImm(0, false);
Expand All @@ -12995,7 +12995,7 @@ void emitter::emitDispIns(
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
}
if (ins == INS_cmeq)
if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it's worth a small helper to cover these 5 instructions since they get grouped together a few times

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, having the helper would be nice.
But this can be done in a follow up PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, this is the jit coding convention around subexpressions (including Boolean expressions)
https://github.com/dotnet/runtime/blob/main/docs/coding-guidelines/clr-jit-coding-conventions.md#11.1

There should be parentheses around all unary and binary expressions if they are contained within other expressions.

{
printf(", ");
emitDispImm(0, false);
Expand Down Expand Up @@ -13136,7 +13136,8 @@ void emitter::emitDispIns(
emitDispReg(id->idReg1(), size, true);
emitDispReg(id->idReg2(), size, false);
}
if (fmt == IF_DV_2L && ins == INS_cmeq)
if (fmt == IF_DV_2L &&
(ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt))
{
printf(", ");
emitDispImm(0, false);
Expand Down
63 changes: 39 additions & 24 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,45 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(!hasImmediateOperand);

switch (intrin.numOperands)
size_t numOperands = intrin.numOperands;

// This handles optimizations for instructions that have
// an implicit 'zero' vector of what would be the second operand.
if ((numOperands == 2) &&
((intrin.op2->IsVectorZero() && intrin.op2->isContained()) ||
(intrin.op1->IsVectorZero() && intrin.op1->isContained() &&
HWIntrinsicInfo::IsCommutative(intrin.id))))
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));

if (intrin.op1->IsVectorZero() && intrin.op1->isContained() &&
HWIntrinsicInfo::IsCommutative(intrin.id))
{
// The intrinsic is commutative, swap the registers.
assert(op1Reg == REG_NA);
op1Reg = op2Reg;
op2Reg = REG_NA;
}

switch (ins)
{
case INS_cmeq:
case INS_cmge:
case INS_cmgt:
case INS_fcmeq:
case INS_fcmge:
case INS_fcmgt:
{
numOperands -= 1;
TIHan marked this conversation as resolved.
Show resolved Hide resolved
break;
}

default:
unreached();
}
}

switch (numOperands)
{
case 1:
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
Expand Down Expand Up @@ -499,29 +537,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_AdvSimd_CompareEqual:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part was removed in favor of the table driven approach as it was simpler to handle the instructions explicitly rather than the intrinsics.

case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
if (intrin.op1->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op1->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
}
else if (intrin.op2->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op2->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else
{
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
}
break;

case NI_AdvSimd_AbsoluteCompareLessThan:
case NI_AdvSimd_AbsoluteCompareLessThanOrEqual:
case NI_AdvSimd_CompareLessThan:
Expand Down
18 changes: 9 additions & 9 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear,
HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, CompareLessThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, CompareTest, -1, 2, {INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_invalid, INS_invalid, INS_cmtst, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
Expand Down Expand Up @@ -492,12 +492,12 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar,
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
Expand Down
Loading