Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proof of concept #90058

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dotnet.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash

set -x
source="${BASH_SOURCE[0]}"
# resolve $SOURCE until the file is no longer a symlink
while [[ -h $source ]]; do
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4746,6 +4746,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals);

// Tail duplication
DoPhase(this, PHASE_TAIL_DUP, &Compiler::fgTailDuplicate);

// Do an early pass of liveness for forward sub and morph. This data is
// valid until after morph.
//
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5521,6 +5521,10 @@ class Compiler

PhaseStatus fgTailMerge();

PhaseStatus fgTailDuplicate();
bool fgTailDuplicateBlock(BasicBlock* const block);
bool fgLocalIsConstantOut(GenTreeLclVarCommon* lcl, BasicBlock* const block, unsigned limit = 10);

enum FG_RELOCATE_TYPE
{
FG_RELOCATE_TRY, // relocate the 'try' region
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1, f
CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", false, -1, false)
#endif // FEATURE_EH_FUNCLETS
CompPhaseNameMacro(PHASE_TAIL_MERGE, "Tail merge", false, -1, false)
CompPhaseNameMacro(PHASE_TAIL_DUP, "Tail duplicate", false, -1, false)
CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", false, -1, false)
CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false)
CompPhaseNameMacro(PHASE_TAIL_MERGE2, "Post-morph tail merge", false, -1, false)
Expand Down
334 changes: 334 additions & 0 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7039,3 +7039,337 @@ PhaseStatus Compiler::fgTailMerge()

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------
// fgTailDuplicate: duplicate sequences of statements in block predecessors
//
// Returns:
// Suitable phase status.
//
// Notes:
// Looks for cases where moving statements from a block to its predecessor
// is likely to result in greatly simplified computations.
//
// Similar in spirit to fgOptimizeUncondBranchToSimpleCond but more general
// as it considers a wider set of computations (not just relops feeding branches).
//
// Ideally we'd do this with SSA available, but our inability to handle
// side effects and to update SSA makes that too challenging.
//
PhaseStatus Compiler::fgTailDuplicate()
{
if (!opts.OptimizationEnabled())
{
return PhaseStatus::MODIFIED_NOTHING;
}

const bool isEnabled = JitConfig.JitEnableTailDuplication() > 0;
if (!isEnabled)
{
JITDUMP("Tail duplication disabled by JitEnableTailDuplication\n");
return PhaseStatus::MODIFIED_NOTHING;
}

// Must be run when local threading is enabled...
//
assert(fgNodeThreading == NodeThreading::AllLocals);

bool madeChanges = false;

// Visit each block
//
for (BasicBlock* const block : Blocks())
{
madeChanges |= fgTailDuplicateBlock(block);
}

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

bool Compiler::fgTailDuplicateBlock(BasicBlock* const block)
{
// We only consider blocks with muliple preds without
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// We only consider blocks with muliple preds without
// We only consider blocks with multiple preds without

// any critical edges.
//
// We could handle critical edges by splitting.
//
unsigned numPreds = 0;

for (BasicBlock* const predBlock : block->PredBlocks())
{
if (predBlock->GetUniqueSucc() != block)
{
// critical edge
return false;
}

if (predBlock->isBBCallAlwaysPairTail())
{
// this block must remain empty
return false;
}

numPreds++;
}

if (numPreds < 2)
{
return false;
}

if (block->bbJumpKind == BBJ_RETURN)
{
if (!compMethodHasRetVal())
{
return false;
}

if (compMethodReturnsRetBufAddr())
{
return false;
}

if (genActualType(info.compRetType) == TYP_STRUCT)
{
return false;
}
}

JITDUMP("\nConsidering tail dup in " FMT_BB ": has %u preds\n", block->bbNum, numPreds);

bool madeChanges = false;

// Walk statements from first.
//
Statement* const lastStmt = block->lastStmt();
for (Statement* stmt : block->Statements())
{
JITDUMP("\nScanning " FMT_STMT "\n", stmt->GetID());
unsigned numLocal = 0;
unsigned numAllConstant = 0;
unsigned numSomeConstant = 0;
for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
{
if (lcl->OperIsLocalStore())
{
continue;
}

JITDUMP("... checking [%06u] V%02u\n", dspTreeID(lcl), lcl->GetLclNum());
numLocal++;
unsigned numConstantThisLocal = 0;
for (BasicBlock* const predBlock : block->PredBlocks())
{
// todo: possibly memoize this if we have repeated uses
// of the same local in this or across statements.
//
if (fgLocalIsConstantOut(lcl, predBlock))
{
numConstantThisLocal++;
}
}

if (numConstantThisLocal == numPreds)
{
numAllConstant++;
}
else if (numConstantThisLocal > 0)
{
numSomeConstant++;
}
}

if (numLocal == 0)
{
// Todo: consider allowing reordering stmts...
break;
}

bool tailDuplicate = false;

if (numAllConstant == numLocal)
{
// todo: cost checks... (heuristic)
JITDUMP("All %u locals are constant in all preds, duplicating " FMT_STMT "\n", numLocal, stmt->GetID());
tailDuplicate = true;
}
else if (numAllConstant > 0)
{
// Could handle this case with a more stringent cost check
JITDUMP(FMT_STMT " %u of the %u locals are constant in all preds\n", stmt->GetID(), numAllConstant,
numLocal);
}
else if (numSomeConstant > 0)
{
// Could handle this case with an even more stringent cost check
JITDUMP(FMT_STMT " %u of the %u locals are constant in some preds\n", stmt->GetID(), numSomeConstant,
numLocal);
}

// leave last statement alone for branches and such.
//
if ((stmt == lastStmt) && !(block->KindIs(BBJ_ALWAYS, BBJ_NONE, BBJ_RETURN)))
{
// if there are constants here we could split the computation off and hoist it like
// we do for returns... not clear if this is better or not.
//
JITDUMP(FMT_STMT " has control flow impact\n", stmt->GetID());
tailDuplicate = false;
}

if (!tailDuplicate)
{
// We didn't duplicate this statement, so we can't duplicate any of the ones that follow.
break;
}

if (tailDuplicate)
{
JITDUMP(" ...duplicating " FMT_STMT "\n", numLocal, stmt->GetID());
// If stmt is GT_RETURN, we need to spill to a new temp, and then dup the spill.
//
bool deleteStmt = true;

if (stmt->GetRootNode()->OperIs(GT_RETURN))
{
assert(compMethodHasRetVal());
GenTree* const retNode = stmt->GetRootNode();
GenTree* const retVal = retNode->AsOp()->gtOp1;
unsigned const retLclNum = lvaGrabTemp(true DEBUGARG("Tail dup return value temp"));
var_types const retLclType = info.compRetType;

lvaGetDesc(retLclNum)->lvType = retLclType;
GenTree* const retTemp = gtNewLclvNode(retLclNum, retLclType);
retTemp->gtFlags |= GTF_DONT_CSE;
retNode->AsOp()->gtOp1 = retTemp;
fgSequenceLocals(stmt);
gtUpdateStmtSideEffects(stmt);

GenTree* const retStore = gtNewTempStore(retLclNum, retVal);
stmt = gtNewStmt(retStore);
fgSequenceLocals(stmt);
deleteStmt = false;
}

for (BasicBlock* const predBlock : block->PredBlocks())
{
GenTree* const clone = gtCloneExpr(stmt->GetRootNode());
noway_assert(clone);
Statement* const cloneStmt = gtNewStmt(clone);
fgInsertStmtAtEnd(predBlock, cloneStmt);
fgSequenceLocals(cloneStmt);
madeChanges = true;

for (GenTreeLclVarCommon* clonedLcl : cloneStmt->LocalsTreeList())
{
LclVarDsc* const clonedLclVarDsc = lvaGetDesc(clonedLcl);
clonedLclVarDsc->incLvRefCntSaturating(1, RCS_EARLY);
}
}

if (deleteStmt)
{
for (GenTreeLclVarCommon* origLcl : stmt->LocalsTreeList())
{
LclVarDsc* const origLclVarDsc = lvaGetDesc(origLcl);
unsigned short refCnt = origLclVarDsc->lvRefCnt(RCS_EARLY);
assert(refCnt > 0);
origLclVarDsc->setLvRefCnt(refCnt - 1, RCS_EARLY);
}

fgRemoveStmt(block, stmt);
}
else
{
// Next statement is now the modified return, leave it as is.
//
break;
}
}

if (stmt == lastStmt)
{
break;
}
}

return madeChanges;
}

// very similar to fgBlockEndFavorsTailDuplication
bool Compiler::fgLocalIsConstantOut(GenTreeLclVarCommon* lcl, BasicBlock* const block, unsigned limit)
{
Statement* const lastStmt = block->lastStmt();
Statement* const firstStmt = block->FirstNonPhiDef();

// Check up to limit statements...
//
unsigned count = 0;

if (lastStmt != nullptr)
{
Statement* stmt = lastStmt;

while (count < limit)
{
count++;

if ((stmt->GetRootNode()->gtFlags & GTF_ASG) != 0)
{
JITDUMP("Checking " FMT_STMT " for constant def of V%02u\n", stmt->GetID(), lcl->GetLclNum());

for (GenTreeLclVarCommon* tree : stmt->LocalsTreeList())
{
if (tree->OperIsLocalStore() && !tree->OperIsBlkOp() && (tree->GetLclNum() == lcl->GetLclNum()))
{
GenTree* const data = tree->Data();
if (data->OperIsConst() || data->OperIs(GT_LCL_ADDR))
{
JITDUMP("[%06u] constant\n", dspTreeID(data));
return true;
}
else if (data->OperIsLocal())
{
lcl = data->AsLclVarCommon();
JITDUMP("Chaining to new local V%02u\n", lcl->GetLclNum());
}
else
{
JITDUMP("[%06u] not constant or local\n", dspTreeID(data));
return false;
}
}
}
}

Statement* const prevStmt = stmt->GetPrevStmt();

// The statement list prev links wrap from first->last, so exit
// when we see lastStmt again, as we've now seen all statements.
//
if (prevStmt == lastStmt)
{
break;
}

stmt = prevStmt;
}

if (count == limit)
{
return false;
}
}

BasicBlock* const pred = block->GetUniquePred(this);

// Walk back in flow
//
if (pred != nullptr)
{
JITDUMP("Chaining back to " FMT_BB " remaining limit %u\n", pred->bbNum, limit - count);
return fgLocalIsConstantOut(lcl, pred, limit - count);
}

return false;
}
3 changes: 3 additions & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,9 @@ CONFIG_INTEGER(JitCFGUseDispatcher, W("JitCFGUseDispatcher"), 2)
// Enable tail merging
CONFIG_INTEGER(JitEnableTailMerge, W("JitEnableTailMerge"), 1)

// Enable tail duplication
CONFIG_INTEGER(JitEnableTailDuplication, W("JitEnableTailDuplication"), 1)

// Enable physical promotion
CONFIG_INTEGER(JitEnablePhysicalPromotion, W("JitEnablePhysicalPromotion"), 1)

Expand Down
Loading