Skip to content

Commit

Permalink
JIT: Support GT_SELECT on xarch (#78879)
Browse files Browse the repository at this point in the history
* JIT: Support GT_SELECT on xarch

Add support for conditional select to the xarch backend. Contained
relops are not supported yet, so for now if-conversion will only produce
these nodes under stress on xarch.

* Fix optimization

* Support decomposition

* Clean up a bit

* Do not break loongarch build

* Stress

* Fix VN for GT_SELECT

Hit this in a test that uses DOTNET_JitOptRepeat=*.

* Revert "Stress"

This reverts commit c0e9751.

* Run jit-format

* Avoid TP impact in release builds while only enabled under stress

* Add comment

* Sigh

* Port fixes
  • Loading branch information
jakobbotsch authored Dec 1, 2022
1 parent 85a9dfc commit ea060e6
Show file tree
Hide file tree
Showing 18 changed files with 341 additions and 42 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1056,8 +1056,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifdef TARGET_ARM64
void genCodeForConditionalCompare(GenTreeOp* tree, GenCondition prevCond);
void genCodeForContainedCompareChain(GenTree* tree, bool* inchain, GenCondition* prevCond);
void genCodeForSelect(GenTreeConditional* tree);
#endif
void genCodeForSelect(GenTreeOp* select);
void genIntrinsic(GenTree* treeNode);
void genPutArgStk(GenTreePutArgStk* treeNode);
void genPutArgReg(GenTreeOp* tree);
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4705,16 +4705,18 @@ void CodeGen::genCodeForContainedCompareChain(GenTree* tree, bool* inChain, GenC
// Arguments:
// tree - the node
//
void CodeGen::genCodeForSelect(GenTreeConditional* tree)
void CodeGen::genCodeForSelect(GenTreeOp* tree)
{
emitter* emit = GetEmitter();
assert(tree->OperIs(GT_SELECT));
GenTreeConditional* select = tree->AsConditional();
emitter* emit = GetEmitter();

GenTree* opcond = tree->gtCond;
GenTree* op1 = tree->gtOp1;
GenTree* op2 = tree->gtOp2;
GenTree* opcond = select->gtCond;
GenTree* op1 = select->gtOp1;
GenTree* op2 = select->gtOp2;
var_types op1Type = genActualType(op1->TypeGet());
var_types op2Type = genActualType(op2->TypeGet());
emitAttr attr = emitActualTypeSize(tree->TypeGet());
emitAttr attr = emitActualTypeSize(select->TypeGet());

assert(!op1->isUsedFromMemory());
assert(genTypeSize(op1Type) == genTypeSize(op2Type));
Expand Down
59 changes: 59 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,55 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
}
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node.
//
// Arguments:
// select - the node
//
void CodeGen::genCodeForSelect(GenTreeOp* select)
{
#ifdef TARGET_X86
assert(select->OperIs(GT_SELECT, GT_SELECT_HI));
#else
assert(select->OperIs(GT_SELECT));
#endif

regNumber dstReg = select->GetRegNum();
if (select->OperIs(GT_SELECT))
{
genConsumeReg(select->AsConditional()->gtCond);
}

genConsumeOperands(select);

instruction cmovKind = INS_cmovne;
GenTree* trueVal = select->gtOp1;
GenTree* falseVal = select->gtOp2;

// If the 'true' operand was allocated the same register as the target
// register then flip it to the false value so we can skip a reg-reg mov.
if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg))
{
std::swap(trueVal, falseVal);
cmovKind = INS_cmove;
}

if (select->OperIs(GT_SELECT))
{
// TODO-CQ: Support contained relops here.
assert(select->AsConditional()->gtCond->isUsedFromReg());

regNumber condReg = select->AsConditional()->gtCond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
}

inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal);
inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal);

genProduceReg(select);
}

//------------------------------------------------------------------------
// genCodeForBT: Generates code for a GT_BT node.
//
Expand Down Expand Up @@ -1737,6 +1786,16 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genCodeForSetcc(treeNode->AsCC());
break;

case GT_SELECT:
genCodeForSelect(treeNode->AsConditional());
break;

#ifdef TARGET_X86
case GT_SELECT_HI:
genCodeForSelect(treeNode->AsOp());
break;
#endif

case GT_BT:
genCodeForBT(treeNode->AsOp());
break;
Expand Down
48 changes: 48 additions & 0 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
break;
#endif // FEATURE_HW_INTRINSICS

case GT_SELECT:
nextNode = DecomposeSelect(use);
break;

case GT_LOCKADD:
case GT_XORR:
case GT_XAND:
Expand Down Expand Up @@ -1507,6 +1511,50 @@ GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use)
}
}

//------------------------------------------------------------------------
// DecomposeSelect: Decompose 64-bit GT_SELECT into a 32-bit GT_SELECT and
// 32-bit GT_SELECT_HI.
//
// Arguments:
// use - the LIR::Use object for the def that needs to be decomposed.
//
// Return Value:
// The next node to process.
//
GenTree* DecomposeLongs::DecomposeSelect(LIR::Use& use)
{
GenTreeConditional* select = use.Def()->AsConditional();
GenTree* op1 = select->gtOp1;
GenTree* op2 = select->gtOp2;

assert(op1->OperIs(GT_LONG));
assert(op2->OperIs(GT_LONG));

GenTree* loOp1 = op1->gtGetOp1();
GenTree* hiOp1 = op1->gtGetOp2();

GenTree* loOp2 = op2->gtGetOp1();
GenTree* hiOp2 = op2->gtGetOp2();

select->gtType = TYP_INT;
select->gtOp1 = loOp1;
select->gtOp2 = loOp2;

Range().Remove(op1);
Range().Remove(op2);

// Normally GT_SELECT is responsible for evaluating the condition into
// flags, but for the "upper half" we treat the lower GT_SELECT similar to
// other flag producing nodes and reuse them. GT_SELECT_HI is the variant
// that uses existing flags and has no condition as part of it.
select->gtFlags |= GTF_SET_FLAGS;
GenTree* hiSelect = m_compiler->gtNewOperNode(GT_SELECT_HI, TYP_INT, hiOp1, hiOp2);

Range().InsertAfter(select, hiSelect);

return FinalizeDecomposition(use, select, hiSelect, hiSelect);
}

//------------------------------------------------------------------------
// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are
// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/decomposelongs.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class DecomposeLongs
GenTree* DecomposeRotate(LIR::Use& use);
GenTree* DecomposeMul(LIR::Use& use);
GenTree* DecomposeUMod(LIR::Use& use);
GenTree* DecomposeSelect(LIR::Use& use);

#ifdef FEATURE_HW_INTRINSICS
GenTree* DecomposeHWIntrinsic(LIR::Use& use);
Expand Down
26 changes: 22 additions & 4 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12173,9 +12173,11 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
break;

case IF_RRW_ARD:
// Mark the destination register as holding a GCT_BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
// Mark the destination register as holding a GC ref
assert(((id->idGCref() == GCT_BYREF) &&
(ins == INS_add || ins == INS_sub || ins == INS_sub_hide || insIsCMOV(ins))) ||
((id->idGCref() == GCT_GCREF) && insIsCMOV(ins)));
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
break;

case IF_ARD_RRD:
Expand Down Expand Up @@ -16414,14 +16416,30 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_cwde:
case INS_cmp:
case INS_test:
case INS_cmovo:
case INS_cmovno:
case INS_cmovb:
case INS_cmovae:
case INS_cmove:
case INS_cmovne:
case INS_cmovbe:
case INS_cmova:
case INS_cmovs:
case INS_cmovns:
case INS_cmovp:
case INS_cmovnp:
case INS_cmovl:
case INS_cmovge:
case INS_cmovle:
case INS_cmovg:
if (memFmt == IF_NONE)
{
result.insThroughput = PERFSCORE_THROUGHPUT_4X;
}
else if (memAccessKind == PERFSCORE_MEMORY_READ)
{
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
if (ins == INS_cmp || ins == INS_test)
if (ins == INS_cmp || ins == INS_test || insIsCMOV(ins))
{
result.insLatency += PERFSCORE_LATENCY_1C;
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ GTNODE(GT , GenTreeOp ,0,GTK_BINOP)
GTNODE(TEST_EQ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
GTNODE(TEST_NE , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)

// Conditional select with 3 operands: condition, true value, false value
GTNODE(SELECT , GenTreeConditional ,0,GTK_SPECIAL)

GTNODE(COMMA , GenTreeOp ,0,GTK_BINOP|DBK_NOTLIR)
Expand Down Expand Up @@ -183,6 +184,9 @@ GTNODE(SUB_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
// RSH_LO represents the lo operation of a 64-bit right shift by a constant int.
GTNODE(LSH_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
GTNODE(RSH_LO , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)

// Variant of SELECT that reuses flags computed by a previous SELECT.
GTNODE(SELECT_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
#endif // !defined(TARGET_64BIT)

#ifdef FEATURE_SIMD
Expand Down
33 changes: 23 additions & 10 deletions src/coreclr/jit/ifconversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,15 +542,11 @@ void OptIfConversionDsc::IfConvertDump()
//
bool OptIfConversionDsc::optIfConvert()
{
#ifndef TARGET_ARM64
return false;
#endif

// Don't optimise the block if it is inside a loop
// When inside a loop, branches are quicker than selects.
// Detect via the block weight as that will be high when inside a loop.
if ((m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT) &&
!m_comp->compStressCompile(m_comp->STRESS_IF_CONVERSION_INNER_LOOPS, 25))
!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_INNER_LOOPS, 25))
{
return false;
}
Expand Down Expand Up @@ -623,8 +619,14 @@ bool OptIfConversionDsc::optIfConvert()

// Using SELECT nodes means that both Then and Else operations are fully evaluated.
// Put a limit on the original source and destinations.
if (!m_comp->compStressCompile(m_comp->STRESS_IF_CONVERSION_COST, 25))
if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_COST, 25))
{
#ifdef TARGET_XARCH
// xarch does not support containing relops in GT_SELECT nodes
// currently so only introduce GT_SELECT in stress.
JITDUMP("Skipping if-conversion on xarch\n");
return false;
#else
int thenCost = 0;
int elseCost = 0;

Expand Down Expand Up @@ -655,11 +657,13 @@ bool OptIfConversionDsc::optIfConvert()
elseCost);
return false;
}
#endif
}

// Get the select node inputs.
GenTree* selectTrueInput;
GenTree* selectFalseInput;
var_types selectType;
GenTree* selectTrueInput;
GenTree* selectFalseInput;
if (m_mainOper == GT_ASG)
{
if (m_doElseConversion)
Expand All @@ -680,6 +684,9 @@ bool OptIfConversionDsc::optIfConvert()

selectTrueInput = m_thenOperation.node->gtGetOp2();
}

// Pick the type as the type of the local, which should always be compatible even for implicit coercions.
selectType = genActualType(m_thenOperation.node->gtGetOp1());
}
else
{
Expand All @@ -689,11 +696,12 @@ bool OptIfConversionDsc::optIfConvert()

selectTrueInput = m_elseOperation.node->gtGetOp1();
selectFalseInput = m_thenOperation.node->gtGetOp1();
selectType = genActualType(m_thenOperation.node);
}

// Create a select node.
GenTreeConditional* select = m_comp->gtNewConditionalNode(GT_SELECT, m_cond, selectTrueInput, selectFalseInput,
m_thenOperation.node->TypeGet());
GenTreeConditional* select =
m_comp->gtNewConditionalNode(GT_SELECT, m_cond, selectTrueInput, selectFalseInput, selectType);
m_thenOperation.node->AsOp()->gtFlags |= (select->gtFlags & GTF_ALL_EFFECT);

// Use the select as the source of the Then operation.
Expand Down Expand Up @@ -766,6 +774,10 @@ PhaseStatus Compiler::optIfConversion()
// This phase does not repect SSA: assignments are deleted/moved.
assert(!fgDomsComputed);

// Currently only enabled on arm64 and under debug on xarch, since we only
// do it under stress.
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_ARM64) || (defined(TARGET_XARCH) && defined(DEBUG))
// Reverse iterate through the blocks.
BasicBlock* block = fgLastBB;
while (block != nullptr)
Expand All @@ -774,6 +786,7 @@ PhaseStatus Compiler::optIfConversion()
madeChanges |= optIfConversionDsc.optIfConvert();
block = block->bbPrev;
}
#endif

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}
10 changes: 6 additions & 4 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,15 @@ GenTree* Lowering::LowerNode(GenTree* node)
return LowerJTrue(node->AsOp());

case GT_SELECT:
#ifdef TARGET_ARM64
ContainCheckSelect(node->AsConditional());
#endif
break;

#ifdef TARGET_X86
case GT_SELECT_HI:
ContainCheckSelect(node->AsOp());
break;
#endif

case GT_JMP:
LowerJmpMethod(node);
break;
Expand Down Expand Up @@ -6993,9 +6997,7 @@ void Lowering::ContainCheckNode(GenTree* node)
break;

case GT_SELECT:
#ifdef TARGET_ARM64
ContainCheckSelect(node->AsConditional());
#endif
break;

case GT_ADD:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class Lowering final : public Phase
bool ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree** earliestValid);
void ContainCheckCompareChainForAnd(GenTree* tree);
void ContainCheckConditionalCompare(GenTreeOp* cmp);
void ContainCheckSelect(GenTreeConditional* node);
#endif
void ContainCheckSelect(GenTreeOp* select);
void ContainCheckBitCast(GenTree* node);
void ContainCheckCallOperands(GenTreeCall* call);
void ContainCheckIndir(GenTreeIndir* indirNode);
Expand Down
Loading

0 comments on commit ea060e6

Please sign in to comment.