From 5dbc3d24d0fbf80a9565d082a6ea5524e0c2bd31 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 26 Sep 2025 09:32:59 -0700 Subject: [PATCH 01/13] basics --- src/coreclr/jit/compiler.h | 4 + src/coreclr/jit/jitmetadatalist.h | 1 + src/coreclr/jit/optimizer.cpp | 211 ++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 7d72c67271770b..474c0a9a702bc2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -88,6 +88,7 @@ struct JumpThreadInfo; // defined in redundantbranchopts.cpp class ProfileSynthesis; // defined in profilesynthesis.h class PerLoopInfo; // defined in inductionvariableopts.cpp class RangeCheck; // defined in rangecheck.h +class SCC; #ifdef DEBUG struct IndentStack; #endif @@ -7024,6 +7025,9 @@ class Compiler void optFindLoops(); bool optCanonicalizeLoops(); + void optFindSCCs(); + void optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs); + void optCompactLoops(); void optCompactLoop(FlowGraphNaturalLoop* loop); bool optCreatePreheader(FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 215ea82ead5606..4ad4c69ed712a5 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -95,6 +95,7 @@ JITMETADATAMETRIC(MorphTrackedLocals, int, 0) JITMETADATAMETRIC(MorphLocals, int, 0) JITMETADATAMETRIC(EnumeratorGDVProvisionalNoEscape, int, 0) JITMETADATAMETRIC(EnumeratorGDVCanCloneToEnsureNoEscape, int, 0) +JITMETADATAMETRIC(IrreducibleLoopsFoundDuringOpts, int, 0) #undef JITMETADATA #undef JITMETADATAINFO diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index d538ca44b2413d..b27d63d30fdee4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2366,6 +2366,8 @@ void Compiler::optFindLoops() // loops by removing edges. fgMightHaveNaturalLoops = m_dfsTree->HasCycle(); assert(fgMightHaveNaturalLoops || (m_loops->NumLoops() == 0)); + + optFindSCCs(); } //----------------------------------------------------------------------------- @@ -2855,6 +2857,215 @@ void Compiler::optSetWeightForPreheaderOrExit(FlowGraphNaturalLoop* loop, BasicB } } +//----------------------------------------------------------------------------- +// optFindSCCs: Find strongly connected components. +// +class SCC +{ +private: + + Compiler* m_comp; + BitVecTraits m_traits; + BitVec m_blocks; + BitVec m_entries; + +public: + + // Factor out traits? + // + SCC(Compiler* comp, BasicBlock* block) + : m_comp(comp) + , m_traits(comp->m_dfsTree->GetPostOrderCount(), comp) + , m_blocks(BitVecOps::UninitVal()) + , m_entries(BitVecOps::UninitVal()) + { + m_blocks = BitVecOps::MakeEmpty(&m_traits); + m_entries = BitVecOps::MakeEmpty(&m_traits); + Add(block); + } + + void Add(BasicBlock* block) + { + BitVecOps::AddElemD(&m_traits, m_blocks, block->bbPostorderNum); + } + + void Finalize() + { + ComputeEntries(); + } + + void ComputeEntries() + { + BitVecOps::Iter iterator(&m_traits, m_blocks); + unsigned int poNum; + while (iterator.NextElem(&poNum)) + { + BasicBlock* const block = m_comp->m_dfsTree->GetPostOrder(poNum); + + for (BasicBlock* pred : block->PredBlocks()) + { + if (!BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) + { + BitVecOps::AddElemD(&m_traits, m_entries, block->bbPostorderNum); + break; + } + } + } + } + + unsigned NumEntries() + { + return BitVecOps::Count(&m_traits, m_entries); + } + + unsigned NumBlocks() + { + return BitVecOps::Count(&m_traits, m_blocks); + } + + BitVec InternalBlocks() + { + return BitVecOps::Diff(&m_traits, m_blocks, m_entries); + } + + void Dump(Compiler* comp) + { + JITDUMP("SCC %p:\n", this); + + BitVecOps::Iter iterator(&m_traits, m_blocks); + unsigned int poNum; + bool first = true; + while (iterator.NextElem(&poNum)) + { + BasicBlock* const block = m_comp->m_dfsTree->GetPostOrder(poNum); + bool isEntry = BitVecOps::IsMember(&m_traits, m_entries, poNum); + JITDUMP("%s" FMT_BB "%s", first ? "" : ", ", block->bbNum, isEntry ? "e" : ""); + first = false; + } + + JITDUMP("\n"); + } +}; + +typedef JitHashTable, SCC*> SCCMap; + +void Compiler::optFindSCCs() +{ + BitVecTraits traits(m_dfsTree->GetPostOrderCount(), this); + BitVec allBlocks = BitVecOps::MakeFull(&traits); + + ArrayStack sccs(getAllocator(CMK_DepthFirstSearch)); + optFindSCCs(allBlocks, traits, sccs); + + unsigned numIrreducible = 0; + + if (sccs.Height() > 0) + { + JITDUMP("\n*** SCCs\n"); + + for (int i = 0; i < sccs.Height(); i++) + { + SCC* const scc = sccs.Bottom(i); + scc->Dump(this); + numIrreducible += (scc->NumEntries() > 1); + } + } + else + { + JITDUMP("\n*** No SCCs\n"); + } + + if (numIrreducible > 0) + { + JITDUMP("\n*** %u Irreducible!\n", numIrreducible); + } + + Metrics.IrreducibleLoopsFoundDuringOpts = (int)numIrreducible; +} + +void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs) +{ + // Initially we map a block to a null entry in the map. + // If we then get a second block in that SCC, we allocate an SCC instance. + // + // We probably want to ignore flow through finallies (back ignore preds that are callfinally ret?) + // + SCCMap map(getAllocator(CMK_DepthFirstSearch)); + + auto assign = [=, &map, &sccs, &traits](auto self, BasicBlock* u, BasicBlock* root, BitVec& subset) -> void { + // Ignore blocks not in the subset + // + if (!BitVecOps::IsMember(&traits, subset, u->bbPostorderNum)) + { + return; + } + + // If we've assigned u an scc, no more work needed. + // + if (map.Lookup(u)) + { + return; + } + + // Else see if there's an SCC for root + // + SCC* scc = nullptr; + bool found = map.Lookup(root, &scc); + + if (found) + { + assert(u != root); + + if (scc == nullptr) + { + scc = new (this, CMK_DepthFirstSearch) SCC(this, root); + map.Set(root, scc, SCCMap::SetKind::Overwrite); + sccs.Push(scc); + } + + scc->Add(u); + } + + // Indicate we've visited u + // + map.Set(u, scc); + + // Walk u's preds looking for more SCC members + // + for (BasicBlock* p : u->PredBlocks()) + { + if (p->KindIs(BBJ_CALLFINALLYRET)) + { + // Should we find the matching callfinally + // and walk back from there? Should just + // be our bbpred, right? + // + continue; + } + + self(self, p, root, subset); + } + }; + + // proper rdfs here? Seems like recursion might be bad + // + for (unsigned i = 0; i < m_dfsTree->GetPostOrderCount(); i++) + { + unsigned rpoNum = m_dfsTree->GetPostOrderCount() - i - 1; + BasicBlock* const block = m_dfsTree->GetPostOrder(rpoNum); + assign(assign, block, block, subset); + } + + if (sccs.Height() > 0) + { + for (int i = 0; i < sccs.Height(); i++) + { + SCC* const scc = sccs.Bottom(i); + scc->Finalize(); + } + } +} + /***************************************************************************** * * See if the given tree can be computed in the given precision (which must From d3a8ad0960f739053e0a1f5472abde031eedea7a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 26 Sep 2025 11:49:41 -0700 Subject: [PATCH 02/13] generalize --- src/coreclr/jit/jitmetadatalist.h | 1 + src/coreclr/jit/optimizer.cpp | 90 ++++++++++++++++++++++++++----- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 4ad4c69ed712a5..c92d0752bc7041 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -96,6 +96,7 @@ JITMETADATAMETRIC(MorphLocals, int, 0) JITMETADATAMETRIC(EnumeratorGDVProvisionalNoEscape, int, 0) JITMETADATAMETRIC(EnumeratorGDVCanCloneToEnsureNoEscape, int, 0) JITMETADATAMETRIC(IrreducibleLoopsFoundDuringOpts, int, 0) +JITMETADATAMETRIC(IrreducibleNestedLoopsFoundDuringOpts, int, 0) #undef JITMETADATA #undef JITMETADATAINFO diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index b27d63d30fdee4..98dca1e0e0bd18 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2864,20 +2864,24 @@ class SCC { private: - Compiler* m_comp; - BitVecTraits m_traits; - BitVec m_blocks; - BitVec m_entries; + Compiler* m_comp; + BitVecTraits m_traits; + BitVec m_blocks; + BitVec m_entries; + jitstd::vector m_nested; + unsigned m_numIrr; public: - // Factor out traits? + // Factor out traits? Parent links? // SCC(Compiler* comp, BasicBlock* block) : m_comp(comp) , m_traits(comp->m_dfsTree->GetPostOrderCount(), comp) , m_blocks(BitVecOps::UninitVal()) , m_entries(BitVecOps::UninitVal()) + , m_nested(comp->getAllocator(CMK_DepthFirstSearch)) + , m_numIrr(0) { m_blocks = BitVecOps::MakeEmpty(&m_traits); m_entries = BitVecOps::MakeEmpty(&m_traits); @@ -2892,6 +2896,7 @@ class SCC void Finalize() { ComputeEntries(); + FindNested(); } void ComputeEntries() @@ -2928,22 +2933,79 @@ class SCC return BitVecOps::Diff(&m_traits, m_blocks, m_entries); } - void Dump(Compiler* comp) + bool IsIrr() { - JITDUMP("SCC %p:\n", this); + return NumEntries() > 1; + } + + unsigned NumIrr() + { + m_numIrr = IsIrr(); + + for (SCC* nested : m_nested) + { + m_numIrr += nested->NumIrr(); + } + + return m_numIrr; + } + void Dump(Compiler* comp, int indent = 0) + { BitVecOps::Iter iterator(&m_traits, m_blocks); unsigned int poNum; bool first = true; while (iterator.NextElem(&poNum)) { + if (first) + { + JITDUMP("%*c", indent, ' '); + + if (NumEntries() > 1) + { + JITDUMP("[irrd] "); + } + else + { + JITDUMP("[loop] "); + } + } + else + { + JITDUMP(", "); + } + first = false; + BasicBlock* const block = m_comp->m_dfsTree->GetPostOrder(poNum); bool isEntry = BitVecOps::IsMember(&m_traits, m_entries, poNum); - JITDUMP("%s" FMT_BB "%s", first ? "" : ", ", block->bbNum, isEntry ? "e" : ""); - first = false; + JITDUMP(FMT_BB "%s", block->bbNum, isEntry ? "e" : ""); } - JITDUMP("\n"); + + for (SCC* child : m_nested) + { + child->Dump(comp, indent + 3); + } + } + + void FindNested() + { + ArrayStack nestedSccs(m_comp->getAllocator(CMK_DepthFirstSearch)); + BitVec nestedBlocks = InternalBlocks(); + m_comp->optFindSCCs(nestedBlocks, m_traits, nestedSccs); + + const int nNested = nestedSccs.Height(); + + if (nNested == 0) + { + return; + } + + for (int i = 0; i < nNested; i++) + { + SCC* const nestedScc = nestedSccs.Bottom(i); + m_nested.push_back(nestedScc); + } } }; @@ -2957,7 +3019,8 @@ void Compiler::optFindSCCs() ArrayStack sccs(getAllocator(CMK_DepthFirstSearch)); optFindSCCs(allBlocks, traits, sccs); - unsigned numIrreducible = 0; + unsigned numIrreducible = 0; + unsigned numNestedIrreducible = 0; if (sccs.Height() > 0) { @@ -2967,7 +3030,8 @@ void Compiler::optFindSCCs() { SCC* const scc = sccs.Bottom(i); scc->Dump(this); - numIrreducible += (scc->NumEntries() > 1); + + numIrreducible += scc->NumIrr(); } } else @@ -2977,7 +3041,7 @@ void Compiler::optFindSCCs() if (numIrreducible > 0) { - JITDUMP("\n*** %u Irreducible!\n", numIrreducible); + JITDUMP("\n*** %u total Irreducible!\n", numIrreducible); } Metrics.IrreducibleLoopsFoundDuringOpts = (int)numIrreducible; From d0129139653c7271561eeac453c43311bd611116 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 26 Sep 2025 15:58:36 -0700 Subject: [PATCH 03/13] add note --- src/coreclr/jit/optimizer.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 98dca1e0e0bd18..fdfdda494f68ce 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3059,6 +3059,23 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack void { // Ignore blocks not in the subset // + // This might be too restrictive. Consider + // + // X -> A; X -> B; + // A -> B; A -> C; + // B -> A; B -> B; + // + // We find the A,B SCC. Its non-header subset is empty. + // + // Thus we fail to find the B self loop "nested" inside. + // + // Might need to be: "ignore in-edges from outside the set, or from + // non-dominated edges in the set...?" So we'd ignore A->B and B->A, + // but not B->B. + // + // However I think we still find all nested SCCs, since those cannot + // share a header with the outer SCC? + // if (!BitVecOps::IsMember(&traits, subset, u->bbPostorderNum)) { return; From 958c0b7f6f35511d9bc80e29fd87ce918f4f86af Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 13 Oct 2025 16:13:52 -0700 Subject: [PATCH 04/13] only find SCCs when DFS/Loops flags improper headers --- src/coreclr/jit/optimizer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index fdfdda494f68ce..389ed7739f5598 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2367,7 +2367,13 @@ void Compiler::optFindLoops() fgMightHaveNaturalLoops = m_dfsTree->HasCycle(); assert(fgMightHaveNaturalLoops || (m_loops->NumLoops() == 0)); - optFindSCCs(); + // If we saw anything unusual when finding loops, + // find the SCCs. + if (m_loops->ImproperLoopHeaders() > 0) + { + JITDUMP("Improper headers detected, finding SCCs\n"); + optFindSCCs(); + } } //----------------------------------------------------------------------------- From 85c77eca610eedbd2fe4b3d2dfa922d525194159 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 15 Oct 2025 16:12:59 -0700 Subject: [PATCH 05/13] naive transform --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 2 + src/coreclr/jit/optimizer.cpp | 184 ++++++++++++++++++++++++++++-- 3 files changed, 175 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 607e93dc42a506..b7e275e9d59381 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7025,7 +7025,7 @@ class Compiler void optFindLoops(); bool optCanonicalizeLoops(); - void optFindSCCs(); + bool optFindSCCs(); void optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs); void optCompactLoops(); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 154054d8453041..21c178af7f89e3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -98,6 +98,8 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, CONFIG_INTEGER(JitUnrollLoopsWithEH, "JitUnrollLoopsWithEH", 0) // If 0, don't unroll loops containing EH regions +RELEASE_CONFIG_INTEGER(JitRemoveIrreducibleLoops, "JitRemoveIrreducibleLoops", 0) // 0: leave as is; 1: switch dispatch + CONFIG_INTEGER(JitDirectAlloc, "JitDirectAlloc", 0) CONFIG_INTEGER(JitDoubleAlign, "JitDoubleAlign", 1) CONFIG_INTEGER(JitEmitPrintRefRegs, "JitEmitPrintRefRegs", 0) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 389ed7739f5598..92eecadf70b7cd 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2345,6 +2345,23 @@ void Compiler::optFindLoops() { m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + // If we saw anything unusual when finding loops, + // find the SCCs. + if (m_loops->ImproperLoopHeaders() > 0) + { + JITDUMP("Improper headers detected, finding SCCs\n"); + if (optFindSCCs()) + { + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + // not guaranteed as this counts "non canonical loops" too + // + assert(m_loops->ImproperLoopHeaders() == 0); + } + } + optCompactLoops(); if (optCanonicalizeLoops()) @@ -2366,14 +2383,6 @@ void Compiler::optFindLoops() // loops by removing edges. fgMightHaveNaturalLoops = m_dfsTree->HasCycle(); assert(fgMightHaveNaturalLoops || (m_loops->NumLoops() == 0)); - - // If we saw anything unusual when finding loops, - // find the SCCs. - if (m_loops->ImproperLoopHeaders() > 0) - { - JITDUMP("Improper headers detected, finding SCCs\n"); - optFindSCCs(); - } } //----------------------------------------------------------------------------- @@ -2876,6 +2885,7 @@ class SCC BitVec m_entries; jitstd::vector m_nested; unsigned m_numIrr; + weight_t m_entryWeight; public: @@ -2888,6 +2898,7 @@ class SCC , m_entries(BitVecOps::UninitVal()) , m_nested(comp->getAllocator(CMK_DepthFirstSearch)) , m_numIrr(0) + , m_entryWeight(0) { m_blocks = BitVecOps::MakeEmpty(&m_traits); m_entries = BitVecOps::MakeEmpty(&m_traits); @@ -2918,6 +2929,7 @@ class SCC if (!BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) { BitVecOps::AddElemD(&m_traits, m_entries, block->bbPostorderNum); + m_entryWeight += block->bbWeight; break; } } @@ -2956,7 +2968,14 @@ class SCC return m_numIrr; } - void Dump(Compiler* comp, int indent = 0) + // Weight of all the flow into entry blocks + // + weight_t TotalEntryWeight() + { + return m_entryWeight; + } + + void Dump(int indent = 0) { BitVecOps::Iter iterator(&m_traits, m_blocks); unsigned int poNum; @@ -2987,10 +3006,15 @@ class SCC JITDUMP(FMT_BB "%s", block->bbNum, isEntry ? "e" : ""); } JITDUMP("\n"); + } + + void DumpAll(int indent = 0) + { + Dump(indent); for (SCC* child : m_nested) { - child->Dump(comp, indent + 3); + child->DumpAll(indent + 3); } } @@ -3013,11 +3037,124 @@ class SCC m_nested.push_back(nestedScc); } } + + //----------------------------------------------------------------------------- + // TransformViaSwichDispatch: modify SCC into a reducible loop + // + // Notes: + // + // A multi-entry SCC is modified as follows: + // * each SCC header H is given an integer index H_i + // * a new BBJ_SWITCH header block J is created + // * a new control local K is allocated. + // * each flow edge that targets one of the H is split + // * In the split block K is assigned the value i + // * Flow from the split block is modified to flow to J + // * The switch in J transfers control to the headers H based on K + // + // Optionally: if the source of an edge to a header is dominated by that header, + // the edge can be left as is. (requires dominators) + // + // (validate: all headers in same EH region, none can be a region entry) + // + bool TransformViaSwitchDispatch() + { + bool modified = false; + const unsigned numHeaders = NumEntries(); + + if (numHeaders > 1) + { + JITDUMP("Transforming SCC via switch dispatch: "); + JITDUMPEXEC(Dump()); + + modified = true; + const unsigned controlVarNum = m_comp->lvaGrabTemp(/* shortLifetime */ false DEBUGARG("SCC control var")); + LclVarDsc* const controlVarDsc = m_comp->lvaGetDesc(controlVarNum); + controlVarDsc->lvType = TYP_INT; + BasicBlock* dispatcher = nullptr; + FlowEdge** const cases = new (m_comp, CMK_FlowEdge) FlowEdge*[numHeaders]; + + // Split edges, rewire flow, and add control var assignments + // + int headerNumber = 0; + BitVecOps::Iter iterator(&m_traits, m_entries); + unsigned int poHeaderNumber = 0; + + while (iterator.NextElem(&poHeaderNumber)) + { + BasicBlock* const header = m_comp->m_dfsTree->GetPostOrder(poHeaderNumber); + if (dispatcher == nullptr) + { + dispatcher = m_comp->fgNewBBbefore(BBJ_SWITCH, header, true); + } + + weight_t incomingWeight = 0; + + for (FlowEdge* const f : header->PredEdgesEditing()) + { + assert(f->getDestinationBlock() == header); + JITDUMP(" Splitting edge " FMT_BB " -> " FMT_BB "\n", f->getSourceBlock()->bbNum, header->bbNum); + BasicBlock* const transferBlock = m_comp->fgSplitEdge(f->getSourceBlock(), header); + GenTree* const targetIndex = m_comp->gtNewIconNode(headerNumber); + GenTree* const storeControlVar = m_comp->gtNewStoreLclVarNode(controlVarNum, targetIndex); + + incomingWeight += f->getLikelyWeight(); + + m_comp->fgNewStmtAtBeg(transferBlock, storeControlVar); + m_comp->fgReplaceJumpTarget(transferBlock, header, dispatcher); + } + + FlowEdge* const dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); + + // Since all flow to header now goes through dispatch, we know the likelihood + // of the dispatch targets, unless all profile data is zero. + // + if (TotalEntryWeight() > 0) + { + dispatchToHeaderEdge->setLikelihood(incomingWeight / TotalEntryWeight()); + } + else + { + dispatchToHeaderEdge->setLikelihood(1.0 / numHeaders); + } + + cases[headerNumber] = dispatchToHeaderEdge; + headerNumber++; + } + + // Create the dispatch switch... all cases unique, no default + // + JITDUMP("Dispatch header is " FMT_BB "; %u cases\n", dispatcher->bbNum, numHeaders); + BBswtDesc* const swtDesc = + new (m_comp, CMK_BasicBlock) BBswtDesc(cases, numHeaders, cases, numHeaders, /* hasDefault */ false); + dispatcher->SetSwitch(swtDesc); + + GenTree* const controlVar = m_comp->gtNewLclvNode(controlVarNum, TYP_INT); + GenTree* const switchNode = m_comp->gtNewOperNode(GT_SWITCH, TYP_VOID, controlVar); + + m_comp->fgNewStmtAtEnd(dispatcher, switchNode); + } + + // Handle nested SCCs + // + for (SCC* const nested : m_nested) + { + modified |= nested->TransformViaSwitchDispatch(); + } + + return modified; + } }; typedef JitHashTable, SCC*> SCCMap; -void Compiler::optFindSCCs() +//----------------------------------------------------------------------------- +// optFindSCCs: find strongly connected components in the flow graph +// +// Returns: +// true if the flow graph was modified +// +bool Compiler::optFindSCCs() { BitVecTraits traits(m_dfsTree->GetPostOrderCount(), this); BitVec allBlocks = BitVecOps::MakeFull(&traits); @@ -3035,7 +3172,7 @@ void Compiler::optFindSCCs() for (int i = 0; i < sccs.Height(); i++) { SCC* const scc = sccs.Bottom(i); - scc->Dump(this); + scc->DumpAll(); numIrreducible += scc->NumIrr(); } @@ -3051,8 +3188,31 @@ void Compiler::optFindSCCs() } Metrics.IrreducibleLoopsFoundDuringOpts = (int)numIrreducible; + + // Optionally try and remove them + // + bool modified = false; + + if (JitConfig.JitRemoveIrreducibleLoops() == 1) + { + for (int i = 0; i < sccs.Height(); i++) + { + SCC* const scc = sccs.Bottom(i); + modified |= scc->TransformViaSwitchDispatch(); + } + } + + return modified; } +//----------------------------------------------------------------------------- +// optFindSCCs: find strongly connected components in a subgraph +// +// Arguments: +// subset - bv describing the subgraph +// traits - bv traits +// sccs - [out] collection of SCCs found in this subgraph +// void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs) { // Initially we map a block to a null entry in the map. From e6cead2c5292d2d038c817124b8f443bf7dafcf4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 15 Oct 2025 18:57:39 -0700 Subject: [PATCH 06/13] fix switch table; handle finallys better --- src/coreclr/jit/jitconfigvalues.h | 3 ++- src/coreclr/jit/optimizer.cpp | 45 ++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 21c178af7f89e3..d0ddd074b89b9b 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -98,7 +98,8 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, CONFIG_INTEGER(JitUnrollLoopsWithEH, "JitUnrollLoopsWithEH", 0) // If 0, don't unroll loops containing EH regions -RELEASE_CONFIG_INTEGER(JitRemoveIrreducibleLoops, "JitRemoveIrreducibleLoops", 0) // 0: leave as is; 1: switch dispatch +RELEASE_CONFIG_INTEGER(JitTransformIrreducibleLoops, "JitTransformIrreducibleLoops", 1) // 0: leave as is; 1: switch + // dispatch CONFIG_INTEGER(JitDirectAlloc, "JitDirectAlloc", 0) CONFIG_INTEGER(JitDoubleAlign, "JitDoubleAlign", 1) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 92eecadf70b7cd..769abad14395b1 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2924,6 +2924,13 @@ class SCC { BasicBlock* const block = m_comp->m_dfsTree->GetPostOrder(poNum); + // cfpt's cannot be scc entries + // + if (block->isBBCallFinallyPairTail()) + { + continue; + } + for (BasicBlock* pred : block->PredBlocks()) { if (!BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) @@ -3072,6 +3079,7 @@ class SCC LclVarDsc* const controlVarDsc = m_comp->lvaGetDesc(controlVarNum); controlVarDsc->lvType = TYP_INT; BasicBlock* dispatcher = nullptr; + FlowEdge** const succs = new (m_comp, CMK_FlowEdge) FlowEdge*[numHeaders]; FlowEdge** const cases = new (m_comp, CMK_FlowEdge) FlowEdge*[numHeaders]; // Split edges, rewire flow, and add control var assignments @@ -3086,6 +3094,7 @@ class SCC if (dispatcher == nullptr) { dispatcher = m_comp->fgNewBBbefore(BBJ_SWITCH, header, true); + dispatcher->setBBProfileWeight(TotalEntryWeight()); } weight_t incomingWeight = 0; @@ -3118,15 +3127,17 @@ class SCC dispatchToHeaderEdge->setLikelihood(1.0 / numHeaders); } + succs[headerNumber] = dispatchToHeaderEdge; cases[headerNumber] = dispatchToHeaderEdge; + headerNumber++; } - // Create the dispatch switch... all cases unique, no default + // Create the dispatch switch... all cases unique. // JITDUMP("Dispatch header is " FMT_BB "; %u cases\n", dispatcher->bbNum, numHeaders); BBswtDesc* const swtDesc = - new (m_comp, CMK_BasicBlock) BBswtDesc(cases, numHeaders, cases, numHeaders, /* hasDefault */ false); + new (m_comp, CMK_BasicBlock) BBswtDesc(succs, numHeaders, cases, numHeaders, /* hasDefault */ true); dispatcher->SetSwitch(swtDesc); GenTree* const controlVar = m_comp->gtNewLclvNode(controlVarNum, TYP_INT); @@ -3193,7 +3204,7 @@ bool Compiler::optFindSCCs() // bool modified = false; - if (JitConfig.JitRemoveIrreducibleLoops() == 1) + if (JitConfig.JitTransformIrreducibleLoops() == 1) { for (int i = 0; i < sccs.Height(); i++) { @@ -3279,17 +3290,27 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStackPredBlocks()) + // Do not walk back out of a handler + // + if (bbIsHandlerBeg(u)) { - if (p->KindIs(BBJ_CALLFINALLYRET)) - { - // Should we find the matching callfinally - // and walk back from there? Should just - // be our bbpred, right? - // - continue; - } + return; + } + // Do not walk back into a finally, + // instead skip to the call finally. + // + if (u->isBBCallFinallyPairTail()) + { + JITDUMP("cfpt " FMT_BB ", advancing to " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); + self(self, u->Prev(), root, subset); + return; + } + + // Else walk preds... + // + for (BasicBlock* p : u->PredBlocks()) + { self(self, p, root, subset); } }; From b6eaa48ca8760bcdd444594c0d13e9d9cd6f21fb Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 16 Oct 2025 15:14:17 -0700 Subject: [PATCH 07/13] fixes --- src/coreclr/jit/fgbasic.cpp | 11 +++++++---- src/coreclr/jit/optimizer.cpp | 33 ++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 0f7af82a794971..7fc7f190a4b27b 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4909,7 +4909,7 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr) // Returns a new block, that is a successor of 'curr' and which branches unconditionally to 'succ' // // Assumptions: -// 'curr' must have a bbKind of BBJ_COND, BBJ_ALWAYS, or BBJ_SWITCH +// 'curr' must have a bbKind of BBJ_COND, BBJ_ALWAYS, BBJ_SWITCH, BBJ_CALLFINALLYRET, or BBJ_EHCATCHRET // // Notes: // The returned block is empty. @@ -4917,10 +4917,13 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr) BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ) { - assert(curr->KindIs(BBJ_COND, BBJ_SWITCH, BBJ_ALWAYS)); + assert(curr->KindIs(BBJ_COND, BBJ_SWITCH, BBJ_ALWAYS, BBJ_CALLFINALLYRET, BBJ_EHCATCHRET)); assert(fgPredsComputed); assert(fgGetPredForBlock(succ, curr) != nullptr); + // For EHCATCHRET put the split block in succ's EH region + BasicBlock* const cursor = curr->KindIs(BBJ_EHCATCHRET) ? succ : curr; + BasicBlock* newBlock; if (curr->NextIs(succ)) { @@ -4928,12 +4931,12 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ) // an immediately following block of a BBJ_SWITCH (which has // no fall-through path). For this case, simply insert a new // fall-through block after 'curr'. - newBlock = fgNewBBafter(BBJ_ALWAYS, curr, true /* extendRegion */); + newBlock = fgNewBBafter(BBJ_ALWAYS, cursor, true /* extendRegion */); } else { // The new block always jumps to 'succ' - newBlock = fgNewBBinRegion(BBJ_ALWAYS, curr, /* isRunRarely */ curr->isRunRarely()); + newBlock = fgNewBBinRegion(BBJ_ALWAYS, cursor, /* isRunRarely */ curr->isRunRarely()); } newBlock->CopyFlags(curr, succ->GetFlagsRaw() & BBF_BACKWARD_JUMP); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 769abad14395b1..63af899b434324 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3084,9 +3084,10 @@ class SCC // Split edges, rewire flow, and add control var assignments // - int headerNumber = 0; + unsigned headerNumber = 0; BitVecOps::Iter iterator(&m_traits, m_entries); unsigned int poHeaderNumber = 0; + weight_t netLikelihood = 0.0; while (iterator.NextElem(&poHeaderNumber)) { @@ -3097,18 +3098,17 @@ class SCC dispatcher->setBBProfileWeight(TotalEntryWeight()); } - weight_t incomingWeight = 0; + weight_t headerWeight = header->bbWeight; for (FlowEdge* const f : header->PredEdgesEditing()) { assert(f->getDestinationBlock() == header); - JITDUMP(" Splitting edge " FMT_BB " -> " FMT_BB "\n", f->getSourceBlock()->bbNum, header->bbNum); - BasicBlock* const transferBlock = m_comp->fgSplitEdge(f->getSourceBlock(), header); + BasicBlock* const pred = f->getSourceBlock(); + JITDUMP(" Splitting edge " FMT_BB " -> " FMT_BB "\n", pred->bbNum, header->bbNum); + BasicBlock* const transferBlock = m_comp->fgSplitEdge(pred, header); GenTree* const targetIndex = m_comp->gtNewIconNode(headerNumber); GenTree* const storeControlVar = m_comp->gtNewStoreLclVarNode(controlVarNum, targetIndex); - incomingWeight += f->getLikelyWeight(); - m_comp->fgNewStmtAtBeg(transferBlock, storeControlVar); m_comp->fgReplaceJumpTarget(transferBlock, header, dispatcher); } @@ -3116,17 +3116,23 @@ class SCC FlowEdge* const dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); // Since all flow to header now goes through dispatch, we know the likelihood - // of the dispatch targets, unless all profile data is zero. + // of the dispatch targets. If all profile data is zero just divide evenly. // - if (TotalEntryWeight() > 0) + if ((headerNumber + 1) == numHeaders) + { + dispatchToHeaderEdge->setLikelihood(1.0 - netLikelihood); + } + else if (TotalEntryWeight() > 0) { - dispatchToHeaderEdge->setLikelihood(incomingWeight / TotalEntryWeight()); + dispatchToHeaderEdge->setLikelihood(headerWeight / TotalEntryWeight()); } else { dispatchToHeaderEdge->setLikelihood(1.0 / numHeaders); } + netLikelihood += dispatchToHeaderEdge->getLikelihood(); + succs[headerNumber] = dispatchToHeaderEdge; cases[headerNumber] = dispatchToHeaderEdge; @@ -3294,6 +3300,7 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStackPredBlocks()) { + // Do not walk back into a catch or filter. + // + if (p->KindIs(BBJ_EHCATCHRET)) + { + JITDUMP("c-cont " FMT_BB ", skipping " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); + continue; + } + self(self, p, root, subset); } }; From 5f0da75f5d85a8f21f219add080458e8d7ab5b54 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 16 Oct 2025 19:38:53 -0700 Subject: [PATCH 08/13] handle some of the complications caused by EH... OSR still needs work --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/jiteh.cpp | 26 ++++++++++++++ src/coreclr/jit/optimizer.cpp | 68 +++++++++++++++++++++++++++++++++-- 3 files changed, 92 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b7e275e9d59381..5d9fdfa5e6173e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2752,6 +2752,7 @@ class Compiler bool bbInHandlerRegions(unsigned regionIndex, BasicBlock* blk); bool bbInCatchHandlerRegions(BasicBlock* tryBlk, BasicBlock* hndBlk); unsigned short bbFindInnermostCommonTryRegion(BasicBlock* bbOne, BasicBlock* bbTwo); + unsigned short bbFindInnermostCommonTryRegion(unsigned tryIndex, BasicBlock* bbTwo); unsigned short bbFindInnermostTryRegionContainingHandlerRegion(unsigned handlerIndex); unsigned short bbFindInnermostHandlerRegionContainingTryRegion(unsigned tryIndex); diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 287cd28393d388..09d26c7152776d 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -615,6 +615,32 @@ unsigned short Compiler::bbFindInnermostCommonTryRegion(BasicBlock* bbOne, Basic return 0; } +/****************************************************************************************** + * Given a one-biased region index (which may be 0, indicating method region) and a block, + * return one-biased index for the inner-most enclosing try region that contains the block and the region. + * Return 0 if it does not find any try region (which means the inner-most region + * is the method itself). + */ + +unsigned short Compiler::bbFindInnermostCommonTryRegion(unsigned index, BasicBlock* bbTwo) +{ + assert(index <= compHndBBtabCount); + + if (index == 0) + return 0; + + for (unsigned XTnum = index - 1; XTnum < compHndBBtabCount; XTnum++) + { + if (bbInTryRegions(XTnum, bbTwo)) + { + noway_assert(XTnum < MAX_XCPTN_INDEX); + return (unsigned short)(XTnum + 1); // Return the tryIndex + } + } + + return 0; +} + // bbIsTryBeg() returns true if this block is the start of any try region. // This is computed by examining the current values in the // EH table rather than just looking at the block's bbFlags. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 63af899b434324..08dacf6fd02626 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2885,7 +2885,13 @@ class SCC BitVec m_entries; jitstd::vector m_nested; unsigned m_numIrr; - weight_t m_entryWeight; + + // lowest common ancestor try index + 1, or 0 if method region + unsigned m_enclosingTryIndex; + // lowest common ancestor handler index + 1, or 0 if method region + unsigned m_enclosingHndIndex; + + weight_t m_entryWeight; public: @@ -2898,6 +2904,8 @@ class SCC , m_entries(BitVecOps::UninitVal()) , m_nested(comp->getAllocator(CMK_DepthFirstSearch)) , m_numIrr(0) + , m_enclosingTryIndex(0) + , m_enclosingHndIndex(0) , m_entryWeight(0) { m_blocks = BitVecOps::MakeEmpty(&m_traits); @@ -2920,6 +2928,8 @@ class SCC { BitVecOps::Iter iterator(&m_traits, m_blocks); unsigned int poNum; + bool isFirstEntry = true; + while (iterator.NextElem(&poNum)) { BasicBlock* const block = m_comp->m_dfsTree->GetPostOrder(poNum); @@ -2937,7 +2947,21 @@ class SCC { BitVecOps::AddElemD(&m_traits, m_entries, block->bbPostorderNum); m_entryWeight += block->bbWeight; - break; + + if (isFirstEntry) + { + m_enclosingTryIndex = block->bbTryIndex; + m_enclosingHndIndex = block->bbHndIndex; + isFirstEntry = false; + } + else + { + // We expect all SCC headers to be in the same handler region + assert(m_enclosingHndIndex == block->bbHndIndex); + + // But possibly different try regions + m_enclosingTryIndex = m_comp->bbFindInnermostCommonTryRegion(m_enclosingTryIndex, block); + } } } } @@ -3045,6 +3069,16 @@ class SCC } } + unsigned EnclosingTryIndex() + { + return m_enclosingTryIndex; + } + + unsigned EnclosingHndIndex() + { + return m_enclosingHndIndex; + } + //----------------------------------------------------------------------------- // TransformViaSwichDispatch: modify SCC into a reducible loop // @@ -3094,7 +3128,25 @@ class SCC BasicBlock* const header = m_comp->m_dfsTree->GetPostOrder(poHeaderNumber); if (dispatcher == nullptr) { - dispatcher = m_comp->fgNewBBbefore(BBJ_SWITCH, header, true); + if ((EnclosingTryIndex() > 0) || (EnclosingHndIndex() > 0)) + { + const bool inTry = (EnclosingTryIndex() != 0) && (EnclosingHndIndex() == 0) || + (EnclosingTryIndex() < EnclosingHndIndex()); + if (inTry) + { + JITDUMP("Dispatch header needs to go in try of EH#%02u ...\n", EnclosingTryIndex() - 1); + } + else + { + JITDUMP("Dispatch header needs to go in handler of EH#%02u ...\n", EnclosingHndIndex() - 1); + } + } + else + { + JITDUMP("Dispatch header needs to go in method region\n"); + } + dispatcher = m_comp->fgNewBBinRegion(BBJ_SWITCH, EnclosingTryIndex(), EnclosingHndIndex(), + /* nearBlk */ nullptr); dispatcher->setBBProfileWeight(TotalEntryWeight()); } @@ -3113,6 +3165,16 @@ class SCC m_comp->fgReplaceJumpTarget(transferBlock, header, dispatcher); } + // If the header is the OSR entry, and the dispatcher is in an enclosing + // try, we must branch there in a more complicated way... + // + // something like: create a new block in the same region as the dispatcher. + // In that block set the OSR control var for the OSR entry (0), and set the + // control vars for each enclosing try that is an SCC header (already handled) + // and then branch to the outermost enclosing try's header. + // + assert(!header->hasTryIndex() || (header != m_comp->fgOSREntryBB)); + FlowEdge* const dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); // Since all flow to header now goes through dispatch, we know the likelihood From 0f019f21b32ad7373eb84618fbedd438005e795b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 21 Oct 2025 18:12:24 -0700 Subject: [PATCH 09/13] need to redo RPO for subgraphs --- src/coreclr/jit/compiler.h | 7 +- src/coreclr/jit/compiler.hpp | 112 +++++++++++++++++++++++++++ src/coreclr/jit/optimizer.cpp | 142 ++++++++++++++++++++++++++++------ 3 files changed, 234 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5d9fdfa5e6173e..786b74116f37cd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6241,6 +6241,9 @@ class Compiler template unsigned fgRunDfs(VisitPreorder assignPreorder, VisitPostorder assignPostorder, VisitEdge visitEdge); + template + unsigned fgRunSubgraphDfs(VisitPreorder assignPreorder, VisitPostorder assignPostorder, VisitEdge visitEdge, BitVec& subgraph, BitVecTraits& subgraphTraits); + template FlowGraphDfsTree* fgComputeDfs(); void fgInvalidateDfsTree(); @@ -7026,8 +7029,8 @@ class Compiler void optFindLoops(); bool optCanonicalizeLoops(); - bool optFindSCCs(); - void optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs); + bool optFindSCCs(bool& failedToModify); + void optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs, BasicBlock** postorder, unsigned postorderCount); void optCompactLoops(); void optCompactLoop(FlowGraphNaturalLoop* loop); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 12280cde617228..79d85dfc8d40d3 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5197,6 +5197,118 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos return preOrderIndex; } +//------------------------------------------------------------------------ +// fgRunSubgraphDfs: Run DFS over a subgraph of the flow graph. +// +// Type parameters: +// VisitPreorder - Functor type that takes a BasicBlock* and its preorder number +// VisitPostorder - Functor type that takes a BasicBlock* and its postorder number +// VisitEdge - Functor type that takes two BasicBlock*. +// useProfile - If true, determines order of successors visited using profile data +// +// Parameters: +// visitPreorder - Functor to visit block in its preorder +// visitPostorder - Functor to visit block in its postorder +// visitEdge - Functor to visit an edge. Called after visitPreorder (if +// this is the first time the successor is seen). +// subgraphBlocks - bitvector (in postorder num space) identifying the subgraph +// subgraphTraits - traits for the subgraphBlocks +// +// Returns: +// Number of blocks visited. +// +// Notes: +// Assumes m_dfs is valid (in particular, uses block post order numbers) +// +template +unsigned Compiler::fgRunSubgraphDfs(VisitPreorder visitPreorder, + VisitPostorder visitPostorder, + VisitEdge visitEdge, + BitVec& subgraph, + BitVecTraits& subgraphTraits) +{ + // just reuse the existing traits here, and index by bbPostorderNum? + JITDUMP("Running subgraph DFS on %u blocks\n", BitVecOps::Count(&subgraphTraits, subgraph)); + BitVecTraits traits(fgBBNumMax + 1, this); + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + unsigned preOrderIndex = 0; + unsigned postOrderIndex = 0; + + ArrayStack blocks(getAllocator(CMK_DepthFirstSearch)); + + auto dfsFrom = [&](BasicBlock* firstBB) { + BitVecOps::AddElemD(&traits, visited, firstBB->bbNum); + blocks.Emplace(this, firstBB, useProfile); + JITDUMP(" visiting " FMT_BB "\n", firstBB->bbNum); + visitPreorder(firstBB, preOrderIndex++); + + while (!blocks.Empty()) + { + BasicBlock* block = blocks.TopRef().Block(); + BasicBlock* succ = blocks.TopRef().NextSuccessor(); + + if (succ != nullptr) + { + if (BitVecOps::IsMember(&subgraphTraits, subgraph, succ->bbPostorderNum)) + { + if (BitVecOps::TryAddElemD(&traits, visited, succ->bbNum)) + { + JITDUMP(" visiting " FMT_BB "\n", succ->bbNum); + blocks.Emplace(this, succ, useProfile); + visitPreorder(succ, preOrderIndex++); + } + + visitEdge(block, succ); + } + } + else + { + blocks.Pop(); + visitPostorder(block, postOrderIndex++); + } + } + }; + + // Find the subgraph entry blocks (blocks that have no pred, or a pred not in the subgraph). + // + ArrayStack entries(getAllocator(CMK_DepthFirstSearch)); + + unsigned poNum = 0; + BitVecOps::Iter iterator(&subgraphTraits, subgraph); + while (iterator.NextElem(&poNum)) + { + BasicBlock* const block = m_dfsTree->GetPostOrder(poNum); + bool hasPred = false; + for (BasicBlock* const pred : block->PredBlocks()) + { + hasPred = true; + if (!BitVecOps::IsMember(&subgraphTraits, subgraph, pred->bbPostorderNum)) + { + JITDUMP(FMT_BB " is subgraph entry\n", block->bbNum); + entries.Emplace(block); + } + } + + if (!hasPred) + { + JITDUMP(FMT_BB " is an isolated subgraph entry\n", block->bbNum); + entries.Emplace(block); + } + } + + // Kick off a DFS from each entry + // + while (entries.Height() > 0) + { + BasicBlock* const block = entries.Pop(); + dfsFrom(block); + } + + assert(preOrderIndex == postOrderIndex); + return preOrderIndex; +} + //------------------------------------------------------------------------ // fgVisitBlocksInLoopAwareRPO: Visit the blocks in 'dfsTree' in reverse post-order, // but ensure loop bodies are visited before loop successors. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 08dacf6fd02626..aa17b92a168332 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2350,16 +2350,17 @@ void Compiler::optFindLoops() if (m_loops->ImproperLoopHeaders() > 0) { JITDUMP("Improper headers detected, finding SCCs\n"); - if (optFindSCCs()) + bool failedToModifyAll = false; + if (optFindSCCs(failedToModifyAll)) { - fgInvalidateDfsTree(); - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - - // not guaranteed as this counts "non canonical loops" too - // - assert(m_loops->ImproperLoopHeaders() == 0); + JITDUMP("SCC modification %ssucceeded, finding natural loops\n", failedToModifyAll ? "partially " : ""); } + + fgInvalidateDfsTree(); + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + assert(failedToModifyAll || (m_loops->ImproperLoopHeaders() == 0)); } optCompactLoops(); @@ -2890,8 +2891,11 @@ class SCC unsigned m_enclosingTryIndex; // lowest common ancestor handler index + 1, or 0 if method region unsigned m_enclosingHndIndex; - + // total weight of all entry blocks weight_t m_entryWeight; + // true if one of the SCC entries is the OSR entry block, and it's within a try region + // transforming these is more complex and is only needed when jitting. + bool m_hasOSREntryHeaderInTry; public: @@ -2907,6 +2911,7 @@ class SCC , m_enclosingTryIndex(0) , m_enclosingHndIndex(0) , m_entryWeight(0) + , m_hasOSREntryHeaderInTry(false) { m_blocks = BitVecOps::MakeEmpty(&m_traits); m_entries = BitVecOps::MakeEmpty(&m_traits); @@ -2926,6 +2931,8 @@ class SCC void ComputeEntries() { + JITDUMP("SCC has %u blocks\n", BitVecOps::Count(&m_traits, m_blocks)); + BitVecOps::Iter iterator(&m_traits, m_blocks); unsigned int poNum; bool isFirstEntry = true; @@ -2945,6 +2952,8 @@ class SCC { if (!BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) { + JITDUMP(FMT_BB " is scc entry\n", block->bbNum); + BitVecOps::AddElemD(&m_traits, m_entries, block->bbPostorderNum); m_entryWeight += block->bbWeight; @@ -2962,6 +2971,11 @@ class SCC // But possibly different try regions m_enclosingTryIndex = m_comp->bbFindInnermostCommonTryRegion(m_enclosingTryIndex, block); } + + if ((block == m_comp->fgOSREntryBB) && block->hasTryIndex()) + { + m_hasOSREntryHeaderInTry = true; + } } } } @@ -2987,6 +3001,11 @@ class SCC return NumEntries() > 1; } + bool HasOSREntryHeaderInTry() + { + return m_hasOSREntryHeaderInTry; + } + unsigned NumIrr() { m_numIrr = IsIrr(); @@ -3053,7 +3072,48 @@ class SCC { ArrayStack nestedSccs(m_comp->getAllocator(CMK_DepthFirstSearch)); BitVec nestedBlocks = InternalBlocks(); - m_comp->optFindSCCs(nestedBlocks, m_traits, nestedSccs); + unsigned nestedCount = BitVecOps::Count(&m_traits, nestedBlocks); + + if (nestedCount == 0) // < 2...? + { + return; + } + + JITDUMP("\n --> nested %u blocks... \n", BitVecOps::Count(&m_traits, nestedBlocks)); + + // Build a new postorder for the nested blocks + // + BasicBlock** postOrder = new (m_comp, CMK_DepthFirstSearch) BasicBlock*[nestedCount]; + + auto visitPreorder = [](BasicBlock* block, unsigned preorderNum) {}; + + auto visitPostorder = [&postOrder](BasicBlock* block, unsigned postorderNum) { + postOrder[postorderNum] = block; + }; + + auto visitEdge = [](BasicBlock* block, BasicBlock* succ) {}; + + unsigned numBlocks = + m_comp->fgRunSubgraphDfs(visitPreorder, visitPostorder, visitEdge, nestedBlocks, + m_traits); + + if (numBlocks != nestedCount) + { + JITDUMP("Eh? numBlocks %u nestedCount %u\n", numBlocks, nestedCount); + } + // assert(numBlocks == nestedCount); + + JITDUMP("[dfs done, postorder is]\n"); + for (unsigned i = 0; i < nestedCount; i++) + { + JITDUMP(FMT_BB " ", postOrder[i]->bbNum); + } + JITDUMP("\n"); + + // Use that to find the nested SCCs + // + m_comp->optFindSCCs(nestedBlocks, m_traits, nestedSccs, postOrder, nestedCount); const int nNested = nestedSccs.Height(); @@ -3067,6 +3127,8 @@ class SCC SCC* const nestedScc = nestedSccs.Bottom(i); m_nested.push_back(nestedScc); } + + JITDUMP("\n <-- nested ... \n"); } unsigned EnclosingTryIndex() @@ -3082,6 +3144,8 @@ class SCC //----------------------------------------------------------------------------- // TransformViaSwichDispatch: modify SCC into a reducible loop // + // Arguments: + // failedToModify - set to true if the transformation could not be completed // Notes: // // A multi-entry SCC is modified as follows: @@ -3098,7 +3162,7 @@ class SCC // // (validate: all headers in same EH region, none can be a region entry) // - bool TransformViaSwitchDispatch() + bool TransformViaSwitchDispatch(bool& failedToModify) { bool modified = false; const unsigned numHeaders = NumEntries(); @@ -3108,6 +3172,14 @@ class SCC JITDUMP("Transforming SCC via switch dispatch: "); JITDUMPEXEC(Dump()); + if (HasOSREntryHeaderInTry()) + { + JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", + m_comp->fgOSREntryBB->bbNum); + failedToModify = true; + return false; + } + modified = true; const unsigned controlVarNum = m_comp->lvaGrabTemp(/* shortLifetime */ false DEBUGARG("SCC control var")); LclVarDsc* const controlVarDsc = m_comp->lvaGetDesc(controlVarNum); @@ -3173,9 +3245,15 @@ class SCC // control vars for each enclosing try that is an SCC header (already handled) // and then branch to the outermost enclosing try's header. // - assert(!header->hasTryIndex() || (header != m_comp->fgOSREntryBB)); - - FlowEdge* const dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); + FlowEdge* dispatchToHeaderEdge = nullptr; + if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != dispatcher->bbTryIndex)) + { + assert(!"Transforming SCC with OSR entry in try region not implemented"); + } + else + { + dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); + } // Since all flow to header now goes through dispatch, we know the likelihood // of the dispatch targets. If all profile data is zero just divide evenly. @@ -3218,7 +3296,7 @@ class SCC // for (SCC* const nested : m_nested) { - modified |= nested->TransformViaSwitchDispatch(); + modified |= nested->TransformViaSwitchDispatch(failedToModify); } return modified; @@ -3230,16 +3308,19 @@ typedef JitHashTable, SCC*> SCCMap; //----------------------------------------------------------------------------- // optFindSCCs: find strongly connected components in the flow graph // +// Arguments: +// failedToModify - [out] set to true if we failed to modify any irreducible SCCs +// // Returns: // true if the flow graph was modified // -bool Compiler::optFindSCCs() +bool Compiler::optFindSCCs(bool& failedToModify) { BitVecTraits traits(m_dfsTree->GetPostOrderCount(), this); BitVec allBlocks = BitVecOps::MakeFull(&traits); ArrayStack sccs(getAllocator(CMK_DepthFirstSearch)); - optFindSCCs(allBlocks, traits, sccs); + optFindSCCs(allBlocks, traits, sccs, m_dfsTree->GetPostOrder(), m_dfsTree->GetPostOrderCount()); unsigned numIrreducible = 0; unsigned numNestedIrreducible = 0; @@ -3277,7 +3358,7 @@ bool Compiler::optFindSCCs() for (int i = 0; i < sccs.Height(); i++) { SCC* const scc = sccs.Bottom(i); - modified |= scc->TransformViaSwitchDispatch(); + modified |= scc->TransformViaSwitchDispatch(failedToModify); } } @@ -3291,8 +3372,11 @@ bool Compiler::optFindSCCs() // subset - bv describing the subgraph // traits - bv traits // sccs - [out] collection of SCCs found in this subgraph +// postorder - array of BasicBlock* in postorder +// postorderCount - size of hte array // -void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStack& sccs) +void Compiler::optFindSCCs( + BitVec& subset, BitVecTraits& traits, ArrayStack& sccs, BasicBlock** postorder, unsigned postorderCount) { // Initially we map a block to a null entry in the map. // If we then get a second block in that SCC, we allocate an SCC instance. @@ -3349,6 +3433,7 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStackbbNum, root->bbNum); scc->Add(u); } @@ -3362,7 +3447,7 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStackKindIs(BBJ_EHCATCHRET)) { - JITDUMP("c-cont " FMT_BB ", skipping " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); + // JITDUMP("c-cont " FMT_BB ", skipping " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); continue; } @@ -3392,12 +3477,19 @@ void Compiler::optFindSCCs(BitVec& subset, BitVecTraits& traits, ArrayStackGetPostOrderCount(); i++) + for (unsigned i = 0; i < postorderCount; i++) { - unsigned rpoNum = m_dfsTree->GetPostOrderCount() - i - 1; - BasicBlock* const block = m_dfsTree->GetPostOrder(rpoNum); + unsigned const rpoNum = postorderCount - i - 1; + BasicBlock* const block = postorder[rpoNum]; + + if (!BitVecOps::IsMember(&traits, subset, block->bbPostorderNum)) + { + continue; + } + + // JITDUMP("Top-level assign: " FMT_BB "\n", block->bbNum); assign(assign, block, block, subset); } From 8af4efe1aaa19652d549bdff7a279714b9cbb3cd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 22 Oct 2025 07:39:22 -0700 Subject: [PATCH 10/13] fix profile asserts --- src/coreclr/jit/optimizer.cpp | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index aa17b92a168332..351e6d5b5961b7 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2950,11 +2950,16 @@ class SCC for (BasicBlock* pred : block->PredBlocks()) { - if (!BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) + if (BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) + { + // Pred is in the scc, so not an entry edge + continue; + } + + if (BitVecOps::TryAddElemD(&m_traits, m_entries, block->bbPostorderNum)) { JITDUMP(FMT_BB " is scc entry\n", block->bbNum); - BitVecOps::AddElemD(&m_traits, m_entries, block->bbPostorderNum); m_entryWeight += block->bbWeight; if (isFirstEntry) @@ -3172,13 +3177,13 @@ class SCC JITDUMP("Transforming SCC via switch dispatch: "); JITDUMPEXEC(Dump()); - if (HasOSREntryHeaderInTry()) - { - JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", - m_comp->fgOSREntryBB->bbNum); - failedToModify = true; - return false; - } +// if (HasOSREntryHeaderInTry()) +// { +// JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", +// m_comp->fgOSREntryBB->bbNum); +// failedToModify = true; +// return false; +// } modified = true; const unsigned controlVarNum = m_comp->lvaGrabTemp(/* shortLifetime */ false DEBUGARG("SCC control var")); @@ -3246,11 +3251,11 @@ class SCC // and then branch to the outermost enclosing try's header. // FlowEdge* dispatchToHeaderEdge = nullptr; - if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != dispatcher->bbTryIndex)) - { - assert(!"Transforming SCC with OSR entry in try region not implemented"); - } - else +// if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != dispatcher->bbTryIndex)) +// { +// assert(!"Transforming SCC with OSR entry in try region not implemented"); +// } +// else { dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); } @@ -3260,7 +3265,7 @@ class SCC // if ((headerNumber + 1) == numHeaders) { - dispatchToHeaderEdge->setLikelihood(1.0 - netLikelihood); + dispatchToHeaderEdge->setLikelihood(max(0.0, (1.0 - netLikelihood))); } else if (TotalEntryWeight() > 0) { From 7055db62742cfbf9152e0f847b4adfadad822114 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 22 Oct 2025 11:12:11 -0700 Subject: [PATCH 11/13] subgraph entries may be reachable from one another; water down residual irr assert --- src/coreclr/jit/compiler.hpp | 8 ++++-- src/coreclr/jit/optimizer.cpp | 49 +++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 79d85dfc8d40d3..307d2312a3e52d 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5297,12 +5297,16 @@ unsigned Compiler::fgRunSubgraphDfs(VisitPreorder visitPreorder, } } - // Kick off a DFS from each entry + // Kick off a DFS from each unvisited entry // while (entries.Height() > 0) { BasicBlock* const block = entries.Pop(); - dfsFrom(block); + + if (!BitVecOps::IsMember(&traits, visited, block->bbNum)) + { + dfsFrom(block); + } } assert(preOrderIndex == postOrderIndex); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 351e6d5b5961b7..9e63d125989da5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2360,7 +2360,29 @@ void Compiler::optFindLoops() m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - assert(failedToModifyAll || (m_loops->ImproperLoopHeaders() == 0)); +#ifdef DEBUG + // If there are SCCs that pass through finallys, faults, or catches, we won't have remediated them above. + // These seem to be relatively rare, and won't represent unstructured flow in wasm, so we can ignore such cases. + // + // Note in the assert below we don't know for sure the unstructured flow goes + // through any EH construct, but it's a start. + // + bool hasEH = false; + for (EHblkDsc* const ehDsc : EHClauses(this)) + { + hasEH = true; + break; + } + + JITDUMP("Graph %s EH; %u improper headers; %s SCC transformations failed\n", hasEH ? "has" : "does not have", + m_loops->ImproperLoopHeaders(), failedToModifyAll ? "some" : "no"); + + // We currently think we handle all expected cases. This may change. + assert(!failedToModifyAll); + + // We should only see residual improper heades if there is EH + assert(hasEH || (m_loops->ImproperLoopHeaders() == 0)); +#endif } optCompactLoops(); @@ -3177,13 +3199,13 @@ class SCC JITDUMP("Transforming SCC via switch dispatch: "); JITDUMPEXEC(Dump()); -// if (HasOSREntryHeaderInTry()) -// { -// JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", -// m_comp->fgOSREntryBB->bbNum); -// failedToModify = true; -// return false; -// } + // if (HasOSREntryHeaderInTry()) + // { + // JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", + // m_comp->fgOSREntryBB->bbNum); + // failedToModify = true; + // return false; + // } modified = true; const unsigned controlVarNum = m_comp->lvaGrabTemp(/* shortLifetime */ false DEBUGARG("SCC control var")); @@ -3251,11 +3273,12 @@ class SCC // and then branch to the outermost enclosing try's header. // FlowEdge* dispatchToHeaderEdge = nullptr; -// if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != dispatcher->bbTryIndex)) -// { -// assert(!"Transforming SCC with OSR entry in try region not implemented"); -// } -// else + // if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != + // dispatcher->bbTryIndex)) + // { + // assert(!"Transforming SCC with OSR entry in try region not implemented"); + // } + // else { dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); } From aa573b50d73161d2c5052fded7a88a789dd19fb9 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 22 Oct 2025 18:18:57 -0700 Subject: [PATCH 12/13] catchret/filterret do not insipre scc entries --- src/coreclr/jit/compiler.hpp | 37 +++++++++++---- src/coreclr/jit/optimizer.cpp | 86 +++++++++++++---------------------- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 307d2312a3e52d..725d9317aa89a0 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5245,8 +5245,8 @@ unsigned Compiler::fgRunSubgraphDfs(VisitPreorder visitPreorder, while (!blocks.Empty()) { - BasicBlock* block = blocks.TopRef().Block(); - BasicBlock* succ = blocks.TopRef().NextSuccessor(); + BasicBlock* const block = blocks.TopRef().Block(); + BasicBlock* const succ = blocks.TopRef().NextSuccessor(); if (succ != nullptr) { @@ -5254,19 +5254,40 @@ unsigned Compiler::fgRunSubgraphDfs(VisitPreorder visitPreorder, { if (BitVecOps::TryAddElemD(&traits, visited, succ->bbNum)) { - JITDUMP(" visiting " FMT_BB "\n", succ->bbNum); blocks.Emplace(this, succ, useProfile); visitPreorder(succ, preOrderIndex++); } visitEdge(block, succ); } + + // If this is a callfinally but the finally itself is not + // in the subset, summarize the flow to the pair tail. + // + // Note a callfinally will only have one successor, so + // we don't need extra conditions here. + // + else if (block->isBBCallFinallyPair()) + { + BasicBlock* const callFinallyRet = block->Next(); + + if (BitVecOps::IsMember(&subgraphTraits, subgraph, callFinallyRet->bbPostorderNum)) + { + if (BitVecOps::TryAddElemD(&traits, visited, callFinallyRet->bbNum)) + { + blocks.Emplace(this, callFinallyRet, useProfile); + visitPreorder(callFinallyRet, preOrderIndex++); + } + + visitEdge(block, callFinallyRet); + } + } + + continue; } - else - { - blocks.Pop(); - visitPostorder(block, postOrderIndex++); - } + + blocks.Pop(); + visitPostorder(block, postOrderIndex++); } }; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 9e63d125989da5..313c1dfb6bd3f4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2970,8 +2970,16 @@ class SCC continue; } + bool isEntry = false; + for (BasicBlock* pred : block->PredBlocks()) { + if (pred->KindIs(BBJ_EHCATCHRET, BBJ_EHFILTERRET)) + { + // Ignore EHCATCHRET preds (requires exceptional flow) + continue; + } + if (BitVecOps::IsMember(&m_traits, m_blocks, pred->bbPostorderNum)) { // Pred is in the scc, so not an entry edge @@ -2981,6 +2989,7 @@ class SCC if (BitVecOps::TryAddElemD(&m_traits, m_entries, block->bbPostorderNum)) { JITDUMP(FMT_BB " is scc entry\n", block->bbNum); + isEntry = true; m_entryWeight += block->bbWeight; @@ -3065,11 +3074,11 @@ class SCC if (NumEntries() > 1) { - JITDUMP("[irrd] "); + JITDUMP("[irrd (%u)] ", NumBlocks()); } else { - JITDUMP("[loop] "); + JITDUMP("[loop (%u)] ", NumBlocks()); } } else @@ -3097,10 +3106,13 @@ class SCC void FindNested() { - ArrayStack nestedSccs(m_comp->getAllocator(CMK_DepthFirstSearch)); - BitVec nestedBlocks = InternalBlocks(); - unsigned nestedCount = BitVecOps::Count(&m_traits, nestedBlocks); + unsigned entryCount = BitVecOps::Count(&m_traits, m_entries); + assert(entryCount > 0); + BitVec nestedBlocks = InternalBlocks(); + unsigned nestedCount = BitVecOps::Count(&m_traits, nestedBlocks); + + // Only entries if (nestedCount == 0) // < 2...? { return; @@ -3108,6 +3120,8 @@ class SCC JITDUMP("\n --> nested %u blocks... \n", BitVecOps::Count(&m_traits, nestedBlocks)); + ArrayStack nestedSccs(m_comp->getAllocator(CMK_DepthFirstSearch)); + // Build a new postorder for the nested blocks // BasicBlock** postOrder = new (m_comp, CMK_DepthFirstSearch) BasicBlock*[nestedCount]; @@ -3129,14 +3143,7 @@ class SCC { JITDUMP("Eh? numBlocks %u nestedCount %u\n", numBlocks, nestedCount); } - // assert(numBlocks == nestedCount); - - JITDUMP("[dfs done, postorder is]\n"); - for (unsigned i = 0; i < nestedCount; i++) - { - JITDUMP(FMT_BB " ", postOrder[i]->bbNum); - } - JITDUMP("\n"); + assert(numBlocks == nestedCount); // Use that to find the nested SCCs // @@ -3199,28 +3206,22 @@ class SCC JITDUMP("Transforming SCC via switch dispatch: "); JITDUMPEXEC(Dump()); - // if (HasOSREntryHeaderInTry()) - // { - // JITDUMP(" cannot transform: header " FMT_BB " is OSR entry in a try region\n", - // m_comp->fgOSREntryBB->bbNum); - // failedToModify = true; - // return false; - // } + // We're making changes + // + modified = true; - modified = true; + // Split edges, rewire flow, and add control var assignments + // const unsigned controlVarNum = m_comp->lvaGrabTemp(/* shortLifetime */ false DEBUGARG("SCC control var")); LclVarDsc* const controlVarDsc = m_comp->lvaGetDesc(controlVarNum); controlVarDsc->lvType = TYP_INT; BasicBlock* dispatcher = nullptr; FlowEdge** const succs = new (m_comp, CMK_FlowEdge) FlowEdge*[numHeaders]; FlowEdge** const cases = new (m_comp, CMK_FlowEdge) FlowEdge*[numHeaders]; - - // Split edges, rewire flow, and add control var assignments - // - unsigned headerNumber = 0; - BitVecOps::Iter iterator(&m_traits, m_entries); - unsigned int poHeaderNumber = 0; - weight_t netLikelihood = 0.0; + unsigned headerNumber = 0; + BitVecOps::Iter iterator(&m_traits, m_entries); + unsigned int poHeaderNumber = 0; + weight_t netLikelihood = 0.0; while (iterator.NextElem(&poHeaderNumber)) { @@ -3254,8 +3255,7 @@ class SCC for (FlowEdge* const f : header->PredEdgesEditing()) { assert(f->getDestinationBlock() == header); - BasicBlock* const pred = f->getSourceBlock(); - JITDUMP(" Splitting edge " FMT_BB " -> " FMT_BB "\n", pred->bbNum, header->bbNum); + BasicBlock* const pred = f->getSourceBlock(); BasicBlock* const transferBlock = m_comp->fgSplitEdge(pred, header); GenTree* const targetIndex = m_comp->gtNewIconNode(headerNumber); GenTree* const storeControlVar = m_comp->gtNewStoreLclVarNode(controlVarNum, targetIndex); @@ -3264,24 +3264,7 @@ class SCC m_comp->fgReplaceJumpTarget(transferBlock, header, dispatcher); } - // If the header is the OSR entry, and the dispatcher is in an enclosing - // try, we must branch there in a more complicated way... - // - // something like: create a new block in the same region as the dispatcher. - // In that block set the OSR control var for the OSR entry (0), and set the - // control vars for each enclosing try that is an SCC header (already handled) - // and then branch to the outermost enclosing try's header. - // - FlowEdge* dispatchToHeaderEdge = nullptr; - // if ((header == m_comp->fgOSREntryBB) && (header->bbTryIndex != - // dispatcher->bbTryIndex)) - // { - // assert(!"Transforming SCC with OSR entry in try region not implemented"); - // } - // else - { - dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); - } + FlowEdge* const dispatchToHeaderEdge = m_comp->fgAddRefPred(header, dispatcher); // Since all flow to header now goes through dispatch, we know the likelihood // of the dispatch targets. If all profile data is zero just divide evenly. @@ -3461,7 +3444,6 @@ void Compiler::optFindSCCs( sccs.Push(scc); } - // JITDUMP("Adding " FMT_BB " to scc with root " FMT_BB "\n", u->bbNum, root->bbNum); scc->Add(u); } @@ -3475,7 +3457,6 @@ void Compiler::optFindSCCs( // if (bbIsHandlerBeg(u)) { - // JITDUMP("hdlr " FMT_BB ", skipping all preds\n"); return; } @@ -3484,7 +3465,6 @@ void Compiler::optFindSCCs( // if (u->isBBCallFinallyPairTail()) { - JITDUMP("cfpt " FMT_BB ", advancing to " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); self(self, u->Prev(), root, subset); return; } @@ -3495,9 +3475,8 @@ void Compiler::optFindSCCs( { // Do not walk back into a catch or filter. // - if (p->KindIs(BBJ_EHCATCHRET)) + if (p->KindIs(BBJ_EHCATCHRET, BBJ_EHFILTERRET)) { - // JITDUMP("c-cont " FMT_BB ", skipping " FMT_BB "\n", u->bbNum, u->Prev()->bbNum); continue; } @@ -3517,7 +3496,6 @@ void Compiler::optFindSCCs( continue; } - // JITDUMP("Top-level assign: " FMT_BB "\n", block->bbNum); assign(assign, block, block, subset); } From 2e58c4892c94c4e1ace2e168026c140ed8eb6a45 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 23 Oct 2025 14:47:28 -0700 Subject: [PATCH 13/13] fix linux build --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 313c1dfb6bd3f4..165398d0ec3772 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3230,7 +3230,7 @@ class SCC { if ((EnclosingTryIndex() > 0) || (EnclosingHndIndex() > 0)) { - const bool inTry = (EnclosingTryIndex() != 0) && (EnclosingHndIndex() == 0) || + const bool inTry = ((EnclosingTryIndex() != 0) && (EnclosingHndIndex() == 0)) || (EnclosingTryIndex() < EnclosingHndIndex()); if (inTry) {