Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6230,7 +6230,8 @@ class Compiler
#endif // DEBUG

weight_t GetCost(BasicBlock* block, BasicBlock* next);
bool TrySwappingPartitions(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);
weight_t GetPartitionCostDelta(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);
void SwapPartitions(unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End);

void ConsiderEdge(FlowEdge* edge);
void AddNonFallthroughSuccs(unsigned blockPos);
Expand Down
142 changes: 100 additions & 42 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4941,6 +4941,9 @@ weight_t Compiler::ThreeOptLayout::GetLayoutCost(unsigned startPos, unsigned end
// block - The block to consider creating fallthrough from
// next - The block to consider creating fallthrough into
//
// Returns:
// The cost
//
weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
{
assert(block != nullptr);
Expand All @@ -4960,8 +4963,8 @@ weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
}

//-----------------------------------------------------------------------------
// Compiler::ThreeOptLayout::TrySwappingPartitions: Evaluates the cost of swapping the given partitions.
// If it is profitable, write the swapped partitions back to 'blockOrder'.
// Compiler::ThreeOptLayout::GetPartitionCostDelta: Computes the current cost of the given partitions,
// and the cost of swapping S2 and S3, returning the difference between them.
//
// Parameters:
// s1Start - The starting position of the first partition
Expand All @@ -4971,24 +4974,10 @@ weight_t Compiler::ThreeOptLayout::GetCost(BasicBlock* block, BasicBlock* next)
// s4End - The ending position (inclusive) of the fourth partition
//
// Returns:
// True if the swap was performed, false otherwise
//
// Notes:
// Here is the proposed partition:
// S1: s1Start ~ s2Start-1
// S2: s2Start ~ s3Start-1
// S3: s3Start ~ s3End
// S4: remaining blocks
//
// After the swap:
// S1: s1Start ~ s2Start-1
// S3: s3Start ~ s3End
// S2: s2Start ~ s3Start-1
// S4: remaining blocks
// The difference in cost between the current and proposed layouts.
// A negative delta indicates the proposed layout is an improvement.
//
// If 's3End' and 's4End' are the same, the fourth partition doesn't exist.
//
bool Compiler::ThreeOptLayout::TrySwappingPartitions(
weight_t Compiler::ThreeOptLayout::GetPartitionCostDelta(
unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End)
{
BasicBlock* const s2Block = blockOrder[s2Start];
Expand All @@ -5015,16 +5004,38 @@ bool Compiler::ThreeOptLayout::TrySwappingPartitions(
newCost += s3BlockPrev->bbWeight;
}

// Check if the swap is profitable
if ((newCost >= currCost) || Compiler::fgProfileWeightsEqual(newCost, currCost, 0.001))
{
return false;
}
return newCost - currCost;
}

// We've found a profitable cut point. Continue with the swap.
JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB
"] (current partition cost = %f, new partition cost = %f)\n",
s2Block->bbNum, s3BlockPrev->bbNum, s3Block->bbNum, lastBlock->bbNum, currCost, newCost);
//-----------------------------------------------------------------------------
// Compiler::ThreeOptLayout::SwapPartitions: Swap the specified partitions.
// It is assumed (and asserted) that the swap is profitable.
//
// Parameters:
// s1Start - The starting position of the first partition
// s2Start - The starting position of the second partition
// s3Start - The starting position of the third partition
// s3End - The ending position (inclusive) of the third partition
// s4End - The ending position (inclusive) of the fourth partition
//
// Notes:
// Here is the proposed partition:
// S1: s1Start ~ s2Start-1
// S2: s2Start ~ s3Start-1
// S3: s3Start ~ s3End
// S4: remaining blocks
//
// After the swap:
// S1: s1Start ~ s2Start-1
// S3: s3Start ~ s3End
// S2: s2Start ~ s3Start-1
// S4: remaining blocks
//
// If 's3End' and 's4End' are the same, the fourth partition doesn't exist.
//
void Compiler::ThreeOptLayout::SwapPartitions(
unsigned s1Start, unsigned s2Start, unsigned s3Start, unsigned s3End, unsigned s4End)
{
INDEBUG(const weight_t currLayoutCost = GetLayoutCost(s1Start, s4End));

// Swap the partitions
Expand Down Expand Up @@ -5058,8 +5069,6 @@ bool Compiler::ThreeOptLayout::TrySwappingPartitions(
Compiler::fgProfileWeightsEqual(newLayoutCost, currLayoutCost, 0.001));
}
#endif // DEBUG

return true;
}

//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -5360,6 +5369,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned

const bool isForwardJump = (srcPos < dstPos);
unsigned s2Start, s3Start, s3End;
weight_t costChange;

if (isForwardJump)
{
Expand All @@ -5374,35 +5384,83 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
// S3: dstPos ~ endPos
// S2: srcPos+1 ~ dstPos-1
// S4: remaining blocks
s2Start = srcPos + 1;
s3Start = dstPos;
s3End = endPos;
s2Start = srcPos + 1;
s3Start = dstPos;
s3End = endPos;
costChange = GetPartitionCostDelta(startPos, s2Start, s3Start, s3End, endPos);
}
else
{

// For backward jumps, we will employ a greedy 4-opt approach to find the ideal cut point
// between the destination and source blocks.
// Here is the proposed partition:
// S1: startPos ~ dstPos-1
// S2: dstPos ~ srcPos-1
// S3: srcPos
// S2: dstPos ~ s3Start-1
// S3: s3Start ~ srcPos
// S4: srcPos+1 ~ endPos
//
// After the swap:
// S1: startPos ~ dstPos-1
// S3: srcPos
// S2: dstPos ~ srcPos-1
// S3: s3Start ~ srcPos
// S2: dstPos ~ s3Start-1
// S4: srcPos+1 ~ endPos
s2Start = dstPos;
s3Start = srcPos;
s3End = srcPos;
s2Start = dstPos;
s3Start = srcPos;
s3End = srcPos;
costChange = BB_ZERO_WEIGHT;

// The cut points before S2 and after S3 are fixed.
// We will search for the optimal cut point before S3.
BasicBlock* const s2Block = blockOrder[s2Start];
BasicBlock* const s2BlockPrev = blockOrder[s2Start - 1];
BasicBlock* const lastBlock = blockOrder[s3End];

// Because the above cut points are fixed, don't waste time re-computing their costs.
// Instead, pre-compute them here.
const weight_t currCostBase =
GetCost(s2BlockPrev, s2Block) +
((s3End < endPos) ? GetCost(lastBlock, blockOrder[s3End + 1]) : lastBlock->bbWeight);
const weight_t newCostBase = GetCost(lastBlock, s2Block);

// Search for the ideal start to S3
for (unsigned position = s2Start + 1; position <= s3End; position++)
{
BasicBlock* const s3Block = blockOrder[position];
BasicBlock* const s3BlockPrev = blockOrder[position - 1];

// Don't consider any cut points that would break up call-finally pairs
if (s3Block->KindIs(BBJ_CALLFINALLYRET))
{
continue;
}

// Compute the cost delta of this partition
const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block);
const weight_t newCost =
newCostBase + GetCost(s2BlockPrev, s3Block) +
((s3End < endPos) ? GetCost(s3BlockPrev, blockOrder[s3End + 1]) : s3BlockPrev->bbWeight);
const weight_t delta = newCost - currCost;

if (delta < costChange)
{
costChange = delta;
s3Start = position;
}
}
}

// Continue evaluating partitions if this one isn't profitable
if (!TrySwappingPartitions(startPos, s2Start, s3Start, s3End, endPos))
if ((costChange >= BB_ZERO_WEIGHT) || Compiler::fgProfileWeightsEqual(costChange, BB_ZERO_WEIGHT, 0.001))
{
continue;
}

JITDUMP("Swapping partitions [" FMT_BB ", " FMT_BB "] and [" FMT_BB ", " FMT_BB "] (cost change = %f)\n",
blockOrder[s2Start]->bbNum, blockOrder[s3Start - 1]->bbNum, blockOrder[s3Start]->bbNum,
blockOrder[s3End]->bbNum, costChange);

SwapPartitions(startPos, s2Start, s3Start, s3End, endPos);

// Update the ordinals for the blocks we moved
for (unsigned i = s2Start; i <= endPos; i++)
{
Expand Down
Loading