diff --git a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp index fd5b51a197cb3a..64277986606f53 100644 --- a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp +++ b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp @@ -330,8 +330,11 @@ static Task *getTapirLoopForStripMining(const Loop *L, TaskInfo &TI, } // TODO: Generalize this condition to support stripmining with a prolog. - assert(isEpilogProfitable(L) && - "Stripmining loop with unprofitable epilog."); +#ifndef NDEBUG + if (!isEpilogProfitable(L)) { + dbgs() << "Stripmining loop with unprofitable epilog.\n"; + } +#endif // Get the task for this loop. return T; @@ -417,7 +420,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, - std::vector &ExtraTaskBlocks, + SmallVectorImpl &ExtraTaskBlocks, + SmallVectorImpl &SharedEHTaskBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); @@ -496,26 +500,61 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, addClonedBlockToLoopInfo(BB, NewBB, LI, NewLoops); VMap[BB] = NewBB; - if (DT) { - // Copy information from original loop to the clone. - BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); - if (VMap.lookup(IDomBB)) - DT->addNewBlock(NewBB, cast(VMap[IDomBB])); - else - DT->addNewBlock(NewBB, cast(IDomBB)); - } // Update PHI nodes in the detach-unwind destination. Strictly speaking, // this step isn't necessary, since the epilog loop will be serialized later // and these new entries for the PHI nodes will therefore be removed. But // the routine for serializing the detach expects valid LLVM, so we update // the PHI nodes here to ensure the resulting LLVM is valid. - if (DI->hasUnwindDest()) + if (DI->hasUnwindDest()) { if (isDetachedRethrow(BB->getTerminator(), DI->getSyncRegion())) { InvokeInst *DR = dyn_cast(BB->getTerminator()); for (PHINode &PN : DR->getUnwindDest()->phis()) PN.addIncoming(PN.getIncomingValueForBlock(BB), NewBB); } + } + } + + // Update PHI nodes in successors of ExtraTaskBlocks, based on the cloned + // values. + for (BasicBlock *BB : ExtraTaskBlocks) { + for (BasicBlock *Succ : successors(BB)) { + if (VMap.count(Succ)) + continue; + + for (PHINode &PN : Succ->phis()) { + Value *Val = PN.getIncomingValueForBlock(BB); + Value *NewVal = VMap.count(Val) ? cast(VMap[Val]) : Val; + PN.addIncoming(NewVal, cast(VMap[BB])); + } + } + } + + // Update DT to accommodate cloned ExtraTaskBlocks. + if (DT) { + for (BasicBlock *BB : ExtraTaskBlocks) { + BasicBlock *NewBB = cast(VMap[BB]); + // Copy information from original loop to the clone, if it's available. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + if (VMap.count(IDomBB)) { + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } else { + BasicBlock *NewIDom = nullptr; + // Get the idom of BB's predecessors. + for (BasicBlock *Pred : predecessors(BB)) + if (VMap.count(Pred)) { + if (NewIDom) + NewIDom = DT->findNearestCommonDominator(NewIDom, Pred); + else + NewIDom = Pred; + } + // Use this computed idom (or its clone) as the idom of the cloned BB. + if (VMap.count(NewIDom)) + DT->addNewBlock(NewBB, cast(VMap[NewIDom])); + else + DT->addNewBlock(NewBB, NewIDom); + } + } } // Change the incoming values to the ones defined in the preheader or @@ -542,6 +581,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, NewPHI->setIncomingValue(idx, V); } } + // Add entries to PHI nodes outside of loop. Strictly speaking, this step // isn't necessary, since the epilog loop will be serialized later and these // new entries for the PHI nodes will therefore be removed. But the routine @@ -930,13 +970,35 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, // Collect extra blocks in the task that LoopInfo does not consider to be part // of the loop, e.g., exception-handling code for the task. - std::vector ExtraTaskBlocks; - for (Task *SubT : depth_first(T)) - for (Spindle *S : depth_first>(SubT->getEntrySpindle())) - for (BasicBlock *BB : S->blocks()) - // Skip blocks in the loop. - if (!L->contains(BB)) - ExtraTaskBlocks.push_back(BB); + SmallVector ExtraTaskBlocks; + SmallVector SharedEHTaskBlocks; + SmallPtrSet SharedEHBlockPreds; + { + SmallPtrSet Visited; + for (Task *SubT : depth_first(T)) { + for (Spindle *S : + depth_first>(SubT->getEntrySpindle())) { + // Only visit shared-eh spindles once a piece. + if (S->isSharedEH() && !Visited.insert(S).second) + continue; + + for (BasicBlock *BB : S->blocks()) { + // Skip blocks in the loop. + if (!L->contains(BB)) { + ExtraTaskBlocks.push_back(BB); + + if (!T->simplyEncloses(BB) && S->isSharedEH()) { + SharedEHTaskBlocks.push_back(BB); + if (S->getEntry() == BB) + for (BasicBlock *Pred : predecessors(BB)) + if (T->simplyEncloses(Pred)) + SharedEHBlockPreds.insert(Pred); + } + } + } + } + } + } SmallVector Reattaches; SmallVector EHBlocksToClone; @@ -978,7 +1040,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, *RemainderLoop = CloneLoopBlocks(L, ModVal, CreateRemainderLoop, true, UnrollRemainder, InsertTop, InsertBot, NewPreheader, NewBlocks, LoopBlocks, - ExtraTaskBlocks, VMap, DT, LI); + ExtraTaskBlocks, SharedEHTaskBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -1017,6 +1079,9 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, SmallPtrSet ClonedEHBlockPreds; for (BasicBlock *B : EHBlockPreds) ClonedEHBlockPreds.insert(cast(VMap[B])); + SmallVector ClonedEHBlocks; + for (BasicBlock *B : EHBlocksToClone) + ClonedEHBlocks.push_back(cast(VMap[B])); // Landing pads and detached-rethrow instructions may or may not have been // cloned. SmallPtrSet ClonedInlinedLPads; @@ -1036,8 +1101,8 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, DetachInst *ClonedDI = cast(VMap[DI]); // Serialize the new task. SerializeDetach(ClonedDI, ParentEntry, EHCont, EHContLPadVal, - ClonedReattaches, &EHBlocksToClone, &ClonedEHBlockPreds, - &ClonedInlinedLPads, &ClonedDetachedRethrows, DT); + ClonedReattaches, &ClonedEHBlocks, &ClonedEHBlockPreds, + &ClonedInlinedLPads, &ClonedDetachedRethrows, DT, LI); } // Detach the stripmined loop. @@ -1070,6 +1135,12 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, LoopReattach = SplitEdge(Latch, NewExit, DT, LI); LoopReattach->setName(Header->getName() + ".strpm.detachloop.reattach"); + // Clone any shared-EH spindles in the stripmined loop to prevent tasks at + // different nesting levels from sharing an EH spindle. + if (!SharedEHTaskBlocks.empty()) + cloneEHBlocks(F, SharedEHTaskBlocks, SharedEHBlockPreds, ".strpm", + nullptr, nullptr, DT, LI); + // Insert new detach instructions if (DI->hasUnwindDest()) { // Insert a detach instruction to detach the stripmined loop. We do this @@ -1481,7 +1552,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, if (TI) // FIXME: Recalculating TaskInfo for the whole function is wasteful. // Optimize this routine in the future. - TI->recalculate(*Header->getParent(), *DT); + TI->recalculate(*F, *DT); return NewLoop; } diff --git a/llvm/test/Transforms/Tapir/loop-stripmine-clone-sharedeh.ll b/llvm/test/Transforms/Tapir/loop-stripmine-clone-sharedeh.ll new file mode 100644 index 00000000000000..f54f9185b13530 --- /dev/null +++ b/llvm/test/Transforms/Tapir/loop-stripmine-clone-sharedeh.ll @@ -0,0 +1,551 @@ +; RUN: opt < %s -loop-stripmine -S -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OLD +; RUN: opt < %s -passes='loop-stripmine' -S -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEW + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.CFVertexData.23.185.333.483.633.1389.2145.3117.4089.5061.6465.7005.7977.9165.10137.10677.11001.11757.12513.13269.13809.14565.14889.15861.16617.17373.18129.19101.19641.20181.21477.21585.22125.22233.22341.22449.22773.23529.23853.23961.24177.24933.25149.25473.26985.27951.29673.30535.31506.31614.33989.34421.35499.36363.37011.39603.56140 = type { [2 x double] } +%class.CFGlobalInfo.69.230.378.528.678.1434.2190.3162.4134.5106.6510.7050.8022.9210.10182.10722.11046.11802.12558.13314.13854.14610.14934.15906.16662.17418.18174.19146.19686.20226.21522.21630.22170.22278.22386.22494.22818.23574.23898.24006.24222.24978.25194.25518.27030.27996.29718.30580.31551.31659.34034.34466.35544.36408.37056.39648.56141 = type <{ i32, [4 x i8], [4 x double], double, double, i8, [7 x i8], i64, i8*, i32, [4 x i8] }> + +$_ZN22GraphBoltEngineComplexI16asymmetricVertex23CFVertexAggregationData12CFVertexData12CFGlobalInfoE12deltaComputeER9edgeArrayS6_ = comdat any + +; Function Attrs: argmemonly nounwind willreturn +declare token @llvm.syncregion.start() #0 + +define dso_local void @_ZN22GraphBoltEngineComplexI16asymmetricVertex23CFVertexAggregationData12CFVertexData12CFGlobalInfoE12deltaComputeER9edgeArrayS6_() unnamed_addr #1 comdat align 2 { +entry: + %syncreg.i = tail call token @llvm.syncregion.start() + detach within %syncreg.i, label %pfor.body61, label %pfor.inc182 + +pfor.body61: ; preds = %entry + br label %cond.true.i.i + +cond.true.i.i: ; preds = %pfor.body61 + unreachable + +pfor.inc182: ; preds = %entry + sync within %syncreg.i, label %sync.continue187 + +sync.continue187: ; preds = %pfor.inc182 + br label %cleanup190 + +cleanup190: ; preds = %sync.continue187 + br i1 undef, label %pfor.cond206.preheader, label %cleanup352 + +pfor.cond206.preheader: ; preds = %cleanup190 + detach within %syncreg.i, label %pfor.body212, label %pfor.inc344 + +pfor.body212: ; preds = %pfor.cond206.preheader + br label %cond.true.i.i444 + +cond.true.i.i444: ; preds = %pfor.body212 + unreachable + +pfor.inc344: ; preds = %pfor.cond206.preheader + sync within %syncreg.i, label %sync.continue349 + +sync.continue349: ; preds = %pfor.inc344 + unreachable + +cleanup352: ; preds = %cleanup190 + br label %if.end361 + +if.end361: ; preds = %cleanup352 + br label %if.end369 + +if.end369: ; preds = %if.end361 + br i1 undef, label %cleanup427.thread, label %if.then380 + +if.then380: ; preds = %if.end369 + br label %_ZN5timer4nextEv.exit684 + +cleanup427.thread: ; preds = %if.end369 + br label %cleanup1350 + +_ZN5timer4nextEv.exit684: ; preds = %if.then380 + br i1 undef, label %if.then434, label %if.end492 + +if.then434: ; preds = %_ZN5timer4nextEv.exit684 + unreachable + +if.end492: ; preds = %_ZN5timer4nextEv.exit684 + br label %cleanup843 + +cleanup843: ; preds = %if.end492 + br label %_ZN5timer4nextEv.exit871 + +_ZN5timer4nextEv.exit871: ; preds = %cleanup843 + br i1 undef, label %cleanup1057, label %pfor.cond864.preheader + +pfor.cond864.preheader: ; preds = %_ZN5timer4nextEv.exit871 + br i1 undef, label %pfor.cond864.us.preheader, label %pfor.cond864.preheader1564 + +pfor.cond864.preheader1564: ; preds = %pfor.cond864.preheader + br label %pfor.cond864 + +; CHECK: pfor.cond864.preheader1564: +; CHECK-NEXT: br i1 {{.+}}, label %pfor.cond.cleanup1052.[[LOOPEXIT:[a-z0-9]+]].strpm-lcssa, label %pfor.cond864.preheader1564.new + +; CHECK: pfor.cond864.preheader1564.new: +; CHECK-NEXT: br label %pfor.cond864.preheader1564.new.strpm.detachloop + +; CHECK: pfor.cond864.preheader1564.new.strpm.detachloop: +; CHECK-NEXT: detach within %syncreg.i, label %pfor.cond864.strpm.detachloop.entry, label %pfor.cond.cleanup1052.[[LOOPEXIT]].strpm-lcssa.loopexit + +pfor.cond864.us.preheader: ; preds = %pfor.cond864.preheader + br label %pfor.cond864.us + +; CHECK: pfor.cond864.us.preheader: +; CHECK-NEXT: br i1 {{.+}}, label %pfor.cond.cleanup1052.[[LOOPEXIT_US:[a-z0-9]+]].strpm-lcssa, label %pfor.cond864.us.preheader.new + +; CHECK: pfor.cond864.us.preheader.new: +; CHECK-NEXT: br label %pfor.cond864.us.preheader.new.strpm.detachloop + +; CHECK: pfor.cond864.us.preheader.new.strpm.detachloop: +; CHECK-NEXT: detach within %syncreg.i, label %pfor.cond864.us.strpm.detachloop.entry, label %pfor.cond.cleanup1052.[[LOOPEXIT_US]].strpm-lcssa.loopexit + +; CHECK: pfor.cond864.us.strpm.detachloop.entry: +; CHECK-NEXT: %[[SYNCREG_US_DETLOOP:.+]] = call token @llvm.syncregion.start() +; CHECK-NEXT: br label %pfor.cond864.us.strpm.outer + +; CHECK: pfor.cond864.us.strpm.outer: +; CHECK-NEXT: %[[NITER_US:.+]] = phi i64 +; CHECK-NEXT: detach within %[[SYNCREG_US_DETLOOP]], label %pfor.body870.us.strpm.outer, label %pfor.inc1049.us.strpm.outer + +; CHECK: pfor.body870.us.strpm.outer: +; CHECK-NEXT: %inverse_component_with_lamda.i.us = alloca [4 x double] +; CHECK-NEXT: %inverse_component_final.i.us = alloca [4 x double] +; CHECK-NEXT: %new_value.us = alloca +; CHECK-NEXT: mul i64 {{[0-9]+}}, %[[NITER_US]] +; CHECK-NEXT: br label %pfor.cond864.us + +pfor.cond864.us: ; preds = %pfor.inc1049.us, %pfor.cond864.us.preheader + %indvars.iv1734 = phi i64 [ 0, %pfor.cond864.us.preheader ], [ %indvars.iv.next1735, %pfor.inc1049.us ] + %indvars.iv.next1735 = add nuw nsw i64 %indvars.iv1734, 1 + detach within %syncreg.i, label %pfor.body870.us, label %pfor.inc1049.us + +; CHECK: pfor.cond864.us: +; CHECK-NEXT: %indvars.iv1734 = phi i64 +; CHECK-NEXT: phi i64 +; CHECK-NEXT: %indvars.iv.next1735 = add nuw nsw i64 %indvars.iv1734, 1 +; CHECK-NOT: detach +; CHECK-NEXT: br label %pfor.body870.us + +pfor.body870.us: ; preds = %pfor.cond864.us + %inverse_component_with_lamda.i.us = alloca [4 x double], align 16 + %inverse_component_final.i.us = alloca [4 x double], align 16 + %new_value.us = alloca %class.CFVertexData.23.185.333.483.633.1389.2145.3117.4089.5061.6465.7005.7977.9165.10137.10677.11001.11757.12513.13269.13809.14565.14889.15861.16617.17373.18129.19101.19641.20181.21477.21585.22125.22233.22341.22449.22773.23529.23853.23961.24177.24933.25149.25473.26985.27951.29673.30535.31506.31614.33989.34421.35499.36363.37011.39603.56140, align 8 + %cmp873.not.us = icmp sgt i64 undef, %indvars.iv1734 + br i1 %cmp873.not.us, label %if.end888.us, label %land.lhs.true874.us + +; CHECK: pfor.body870.us: +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: br i1 {{.+}}, label %if.end888.us, label %land.lhs.true874.us + +land.lhs.true874.us: ; preds = %pfor.body870.us + %0 = load i8*, i8** undef, align 8 + %arrayidx877.us = getelementptr inbounds i8, i8* %0, i64 %indvars.iv1734 + %1 = load i8, i8* %arrayidx877.us, align 1 + %cmp880.us = icmp eq i8 %1, 0 + br label %if.then881.us + +if.then881.us: ; preds = %land.lhs.true874.us + %2 = load %class.CFGlobalInfo.69.230.378.528.678.1434.2190.3162.4134.5106.6510.7050.8022.9210.10182.10722.11046.11802.12558.13314.13854.14610.14934.15906.16662.17418.18174.19146.19686.20226.21522.21630.22170.22278.22386.22494.22818.23574.23898.24006.24222.24978.25194.25518.27030.27996.29718.30580.31551.31659.34034.34466.35544.36408.37056.39648.56141*, %class.CFGlobalInfo.69.230.378.528.678.1434.2190.3162.4134.5106.6510.7050.8022.9210.10182.10722.11046.11802.12558.13314.13854.14610.14934.15906.16662.17418.18174.19146.19686.20226.21522.21630.22170.22278.22386.22494.22818.23574.23898.24006.24222.24978.25194.25518.27030.27996.29718.30580.31551.31659.34034.34466.35544.36408.37056.39648.56141** undef, align 8 + %3 = load i32, i32* undef, align 8 + %cmp.i.i830.us = icmp ult i64 %indvars.iv1734, undef + br i1 %cmp.i.i830.us, label %cond.true.i.i835.us, label %cond.false.i.i837.us + +cond.false.i.i837.us: ; preds = %if.then881.us + %rem.i.i836.us = and i32 undef, 1 + br label %_ZNK12CFGlobalInfo19belongsToPartition1Ej.exit.i840.us + +cond.true.i.i835.us: ; preds = %if.then881.us + %4 = load i8*, i8** undef, align 8 + %5 = zext i8 undef to i32 + br label %_ZNK12CFGlobalInfo19belongsToPartition1Ej.exit.i840.us + +_ZNK12CFGlobalInfo19belongsToPartition1Ej.exit.i840.us: ; preds = %cond.true.i.i835.us, %cond.false.i.i837.us + %tobool2.i.not.i839.us = icmp ne i32 undef, 0 + %spec.select = zext i1 %tobool2.i.not.i839.us to i8 + store i8 %spec.select, i8* %arrayidx877.us, align 1 + br label %if.end888.us + +if.end888.us: ; preds = %_ZNK12CFGlobalInfo19belongsToPartition1Ej.exit.i840.us, %pfor.body870.us + %6 = load i8, i8* undef, align 1 + %tobool892.not.us = icmp eq i8 %6, 0 + br i1 %tobool892.not.us, label %pfor.preattach1048.us, label %if.then893 + +; CHECK: if.end888.us: +; CHECK: br i1 %tobool892.not.us, label %pfor.preattach1048.us, label %if.then893.loopexit + +pfor.preattach1048.us: ; preds = %if.end888.us + reattach within %syncreg.i, label %pfor.inc1049.us + +; CHECK: pfor.preattach1048.us: +; CHECK-NOT: reattach +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: br label %pfor.inc1049.us + +pfor.inc1049.us: ; preds = %pfor.preattach1048.us, %pfor.cond864.us + %exitcond1737.not = icmp eq i64 %indvars.iv.next1735, undef + br i1 %exitcond1737.not, label %pfor.cond.cleanup1052, label %pfor.cond864.us + +; CHECK: pfor.inc1049.us: +; CHECK: br i1 %{{.+}}, label %[[PFOR_INC1049_US_REATTACH:.+]], label %pfor.cond864.us + +; CHECK: [[PFOR_INC1049_US_REATTACH]]: +; CHECK-NEXT: reattach within %[[SYNCREG_US_DETLOOP]], label %pfor.inc1049.us.strpm.outer + +; CHECK: pfor.inc1049.us.strpm.outer: +; CHECK-NEXT: add nuw nsw i64 %[[NITER_US]], 1 +; CHECK: br i1 {{.+}}, label %pfor.cond864.us.strpm.detachloop.sync, label %pfor.cond864.us.strpm.outer + +; CHECK: pfor.cond864.us.strpm.detachloop.sync: +; CHECK: sync within %[[SYNCREG_US_DETLOOP]], label %pfor.cond864.us.strpm.detachloop.reattach.split + +; CHECK: pfor.cond864.us.strpm.detachloop.reattach.split: +; CHECK: reattach within %syncreg.i, label %pfor.cond.cleanup1052.[[LOOPEXIT_US]].strpm-lcssa.loopexit + + +; CHECK: pfor.cond864.strpm.detachloop.entry: +; CHECK-NEXT: %[[SYNCREG_DETLOOP:.+]] = call token @llvm.syncregion.start() +; CHECK-NEXT: br label %pfor.cond864.strpm.outer + +; CHECK: pfor.cond864.strpm.outer: +; CHECK-NEXT: %[[NITER:.+]] = phi i64 +; CHECK-NEXT: detach within %[[SYNCREG_DETLOOP]], label %pfor.body870.strpm.outer, label %pfor.inc1049.strpm.outer + +; CHECK: pfor.body870.strpm.outer: +; CHECK-NEXT: %inverse_component_with_lamda.i = alloca [4 x double] +; CHECK-NEXT: %inverse_component_final.i = alloca [4 x double] +; CHECK-NEXT: %new_value = alloca +; CHECK-NEXT: mul i64 {{[0-9]+}}, %[[NITER]] +; CHECK-NEXT: br label %pfor.cond864 + +pfor.cond864: ; preds = %pfor.inc1049, %pfor.cond864.preheader1564 + %indvars.iv1730 = phi i64 [ 0, %pfor.cond864.preheader1564 ], [ %indvars.iv.next1731, %pfor.inc1049 ] + %indvars.iv.next1731 = add nuw nsw i64 %indvars.iv1730, 1 + detach within %syncreg.i, label %pfor.body870, label %pfor.inc1049 + +; CHECK: pfor.cond864: +; CHECK-NEXT: %indvars.iv1730 = phi i64 +; CHECK-NEXT: phi i64 +; CHECK-NEXT: %indvars.iv.next1731 = add nuw nsw i64 %indvars.iv1730, 1 +; CHECK-NEXT: br label %pfor.body870 + +pfor.body870: ; preds = %pfor.cond864 + %inverse_component_with_lamda.i = alloca [4 x double], align 16 + %inverse_component_final.i = alloca [4 x double], align 16 + %new_value = alloca %class.CFVertexData.23.185.333.483.633.1389.2145.3117.4089.5061.6465.7005.7977.9165.10137.10677.11001.11757.12513.13269.13809.14565.14889.15861.16617.17373.18129.19101.19641.20181.21477.21585.22125.22233.22341.22449.22773.23529.23853.23961.24177.24933.25149.25473.26985.27951.29673.30535.31506.31614.33989.34421.35499.36363.37011.39603.56140, align 8 + %cmp873.not = icmp sgt i64 undef, %indvars.iv1730 + br i1 %cmp873.not, label %if.end888, label %land.lhs.true874 + +; CHECK: pfor.body870: +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: br i1 {{.+}}, label %if.end888, label %land.lhs.true874 + +land.lhs.true874: ; preds = %pfor.body870 + %7 = load i8*, i8** undef, align 8 + %arrayidx877 = getelementptr inbounds i8, i8* %7, i64 %indvars.iv1730 + %8 = load i8, i8* %arrayidx877, align 1 + %cmp880 = icmp eq i8 %8, 0 + %9 = load %class.CFGlobalInfo.69.230.378.528.678.1434.2190.3162.4134.5106.6510.7050.8022.9210.10182.10722.11046.11802.12558.13314.13854.14610.14934.15906.16662.17418.18174.19146.19686.20226.21522.21630.22170.22278.22386.22494.22818.23574.23898.24006.24222.24978.25194.25518.27030.27996.29718.30580.31551.31659.34034.34466.35544.36408.37056.39648.56141*, %class.CFGlobalInfo.69.230.378.528.678.1434.2190.3162.4134.5106.6510.7050.8022.9210.10182.10722.11046.11802.12558.13314.13854.14610.14934.15906.16662.17418.18174.19146.19686.20226.21522.21630.22170.22278.22386.22494.22818.23574.23898.24006.24222.24978.25194.25518.27030.27996.29718.30580.31551.31659.34034.34466.35544.36408.37056.39648.56141** undef, align 8 + br label %land.lhs.true2.i843 + +land.lhs.true2.i843: ; preds = %land.lhs.true874 + %10 = load i32, i32* undef, align 8 + %cmp.i5.i842 = icmp ult i64 %indvars.iv1730, undef + br i1 %cmp.i5.i842, label %cond.true.i9.i848, label %_ZNK12CFGlobalInfo19belongsToPartition2Ej.exit.i851 + +cond.true.i9.i848: ; preds = %land.lhs.true2.i843 + %11 = load i8*, i8** undef, align 8 + %12 = load i8, i8* undef, align 1 + %tobool.not.i.i847 = icmp eq i8 %12, 0 + br label %_Z30forceComputeVertexForIterationI12CFGlobalInfoEbRKjiRKT_.exit + +_ZNK12CFGlobalInfo19belongsToPartition2Ej.exit.i851: ; preds = %land.lhs.true2.i843 + %rem.i10.i8497 = and i64 %indvars.iv1730, 1 + %tobool2.not.i.i850 = icmp eq i64 %rem.i10.i8497, 0 + br label %_Z30forceComputeVertexForIterationI12CFGlobalInfoEbRKjiRKT_.exit + +_Z30forceComputeVertexForIterationI12CFGlobalInfoEbRKjiRKT_.exit: ; preds = %_ZNK12CFGlobalInfo19belongsToPartition2Ej.exit.i851, %cond.true.i9.i848 + store i8 1, i8* %arrayidx877, align 1 + br label %if.end888 + +if.end888: ; preds = %_Z30forceComputeVertexForIterationI12CFGlobalInfoEbRKjiRKT_.exit, %pfor.body870 + %13 = load i8, i8* undef, align 1 + %tobool892.not = icmp eq i8 %13, 0 + br i1 %tobool892.not, label %pfor.preattach1048, label %if.then893.loopexit1565 + +if.then893.loopexit1565: ; preds = %if.end888 + br label %if.then893 + +; CHECK: if.then893.loopexit1565: +; CHECK-NEXT: br label %{{if.then893|if.then893.strpm}} + +; CHECK: if.then893.loopexit: +; CHECK-NEXT: br label %{{if.then893|if.then893.strpm}} + +if.then893: ; preds = %if.then893.loopexit1565, %if.end888.us + switch i32 undef, label %if.end936 [ + i32 0, label %land.lhs.true.i784 + i32 1, label %land.lhs.true2.i795 + ] + +land.lhs.true.i784: ; preds = %if.then893 + unreachable + +land.lhs.true2.i795: ; preds = %if.then893 + unreachable + +if.end936: ; preds = %if.then893 + br label %for.cond.i767 + +for.cond.i767: ; preds = %for.cond.i767, %if.end936 + br label %for.cond.i767 + +pfor.preattach1048: ; preds = %if.end888 + reattach within %syncreg.i, label %pfor.inc1049 + +; CHECK: pfor.preattach1048: +; CHECK-NOT: reattach +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: br label %pfor.inc1049 + +pfor.inc1049: ; preds = %pfor.preattach1048, %pfor.cond864 + %exitcond1733.not = icmp eq i64 %indvars.iv.next1731, undef + br i1 %exitcond1733.not, label %pfor.cond.cleanup1052, label %pfor.cond864 + +; CHECK: pfor.inc1049: +; CHECK: br i1 {{.+}}, label %[[PFOR_INC1049_REATTACH:.+]], label %pfor.cond864 + +; CHECK: [[PFOR_INC1049_REATTACH]]: +; CHECK-NEXT: reattach within %[[SYNCREG_DETLOOP]], label %pfor.inc1049.strpm.outer + +; CHECK: pfor.inc1049.strpm.outer: +; CHECK-NEXT: add nuw nsw i64 %[[NITER]], 1 +; CHECK: br i1 {{.+}}, label %pfor.cond864.strpm.detachloop.sync, label %pfor.cond864.strpm.outer + +; CHECK: pfor.cond864.strpm.detachloop.sync: +; CHECK-NEXT: sync within %[[SYNCREG_DETLOOP]], label %pfor.cond864.strpm.detachloop.reattach.split + +; CHECK: pfor.cond864.strpm.detachloop.reattach.split: +; CHECK-NEXT: reattach within %syncreg.i, label %pfor.cond.cleanup1052.[[LOOPEXIT]].strpm-lcssa.loopexit + + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT_US]].strpm-lcssa.loopexit +; CHECK-NEXT: br label %pfor.cond.cleanup1052.[[LOOPEXIT_US]].strpm-lcssa + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT_US]].strpm-lcssa +; CHECK-NEXT: br i1 {{.+}}, label %pfor.cond864.us.epil.preheader, label %pfor.cond.cleanup1052.[[LOOPEXIT_US]] + +; CHECK: pfor.cond864.us.epil.preheader: +; CHECK-NEXT: br label %pfor.cond864.us.epil + +; CHECK: pfor.cond864.us.epil: +; CHECK-NEXT: %indvars.iv1734.epil = phi i64 +; CHECK-NEXT: phi i64 +; CHECK-NEXT: %indvars.iv.next1735.epil = add nuw nsw i64 %indvars.iv1734.epil, 1 +; CHECK: br label %pfor.body870.us.epil + +; CHECK: pfor.body870.us.epil: +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: br i1 {{.+}}, label %if.end888.us.epil, label %land.lhs.true874.us.epil + +; CHECK: if.end888.us.epil: +; CHECK: br i1 {{.+}}, label %pfor.preattach1048.us.epil, label %if.then893.loopexit.epil + +; CHECK: pfor.preattach1048.us.epil: +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: br label %pfor.inc1049.us.epil + +; CHECK: pfor.inc1049.us.epil: +; CHECK: br i1 {{.+}}, label %pfor.cond864.us.epil, label %pfor.cond.cleanup1052.[[LOOPEXIT_US]].epilog-lcssa + +; CHECK: if.then893.loopexit.epil: +; CHECK-NEXT: br label %if.then893.[[EPIL_US_SHAREDEH:.[a-z0-9.]+]] + + +; CHECK-NEW: if.then893.epil4: +; CHECK-NEW-NEXT: switch i32 {{.+}}, label %if.end936.epil7 [ +; CHECK-NEW-NEXT: i32 0, label %land.lhs.true.i784.epil6 +; CHECK-NEW-NEXT: i32 1, label %land.lhs.true2.i795.epil5 +; CHECK-NEW-NEXT: ] + +; CHECK-NEW: if.end936.epil7: +; CHECK-NEW-NEXT: br label %for.cond.i767.epil8 + +; CHECK-NEW: for.cond.i767.epil8: +; CHECK-NEW-NEXT: br label %for.cond.i767.epil8 + +; CHECK-OLD: if.then893.epil: +; CHECK-OLD-NEXT: switch i32 {{.+}}, label %if.end936.epil [ +; CHECK-OLD-NEXT: i32 0, label %land.lhs.true.i784.epil +; CHECK-OLD-NEXT: i32 1, label %land.lhs.true2.i795.epil +; CHECK-OLD-NEXT: ] + +; CHECK-OLD: if.end936.epil: +; CHECK-OLD-NEXT: br label %for.cond.i767.epil + +; CHECK-OLD: for.cond.i767.epil: +; CHECK-OLD-NEXT: br label %for.cond.i767.epil + + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT_US]].epilog-lcssa: +; CHECK-NEXT: br label %pfor.cond.cleanup1052.[[LOOPEXIT_US]] + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT_US]]: +; CHECK-NEXT: br label %pfor.cond.cleanup1052 + + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT]].strpm-lcssa.loopexit: +; CHECK-NEXT: br label %pfor.cond.cleanup1052.[[LOOPEXIT]].strpm-lcssa + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT]].strpm-lcssa: +; CHECK-NEXT: br i1 {{.+}}, label %pfor.cond864.epil.preheader, label %pfor.cond.cleanup1052.[[LOOPEXIT]] + +; CHECK: pfor.cond864.epil.preheader: +; CHECK-NEXT: br label %pfor.cond864.epil + +; CHECK: pfor.cond864.epil: +; CHECK-NEXT: %indvars.iv1730.epil = phi i64 +; CHECK-NEXT: phi i64 +; CHECK-NEXT: %indvars.iv.next1731.epil = add nuw nsw i64 %indvars.iv1730.epil, 1 +; CHECK-NEXT: br label %pfor.body870.epil + +; CHECK: pfor.body870.epil: +; CHECK-NOT: alloca +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: call void @llvm.lifetime.start +; CHECK: br i1 {{.+}}, label %if.end888.epil, label %land.lhs.true874.epil + +; CHECK: if.end888.epil: +; CHECK: br i1 {{.+}}, label %pfor.preattach1048.epil, label %if.then893.loopexit1565.epil + +; CHECK: pfor.preattach1048.epil: +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: call void @llvm.lifetime.end +; CHECK: br label %pfor.inc1049.epil + +; CHECK: pfor.inc1049.epil: +; CHECK: br i1 {{.+}}, label %pfor.cond864.epil, label %pfor.cond.cleanup1052.[[LOOPEXIT]].epilog-lcssa + +; CHECK: if.then893.loopexit1565.epil: +; CHECK-NEXT: br label %if.then893.[[EPIL_SHAREDEH:.[a-z0-9.]+]] + + +; CHECK-NEW: if.then893.epil: +; CHECK-NEW-NEXT: switch i32 {{.+}}, label %if.end936.epil [ +; CHECK-NEW-NEXT: i32 0, label %land.lhs.true.i784.epil +; CHECK-NEW-NEXT: i32 1, label %land.lhs.true2.i795.epil +; CHECK-NEW-NEXT: ] + +; CHECK-NEW: if.end936.epil: +; CHECK-NEW-NEXT: br label %for.cond.i767.epil + +; CHECK-NEW: for.cond.i767.epil: +; CHECK-NEW-NEXT: br label %for.cond.i767.epil + +; CHECK-OLD: if.then893.epil4: +; CHECK-OLD-NEXT: switch i32 {{.+}}, label %if.end936.epil7 [ +; CHECK-OLD-NEXT: i32 0, label %land.lhs.true.i784.epil6 +; CHECK-OLD-NEXT: i32 1, label %land.lhs.true2.i795.epil5 +; CHECK-OLD-NEXT: ] + +; CHECK-OLD: if.end936.epil7: +; CHECK-OLD-NEXT: br label %for.cond.i767.epil8 + +; CHECK-OLD: for.cond.i767.epil8: +; CHECK-OLD-NEXT: br label %for.cond.i767.epil8 + + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT]].epilog-lcssa: +; CHECK-NEXT: br label %pfor.cond.cleanup1052.[[LOOPEXIT]] + +; CHECK: pfor.cond.cleanup1052.[[LOOPEXIT]]: +; CHECK-NEXT: br label %pfor.cond.cleanup1052 + +pfor.cond.cleanup1052: ; preds = %pfor.inc1049, %pfor.inc1049.us + sync within %syncreg.i, label %sync.continue1054 + +sync.continue1054: ; preds = %pfor.cond.cleanup1052 + unreachable + +cleanup1057: ; preds = %_ZN5timer4nextEv.exit871 + detach within %syncreg.i, label %pfor.body1080, label %pfor.inc1173 + +pfor.body1080: ; preds = %cleanup1057 + br label %if.then1101 + +if.then1101: ; preds = %pfor.body1080 + unreachable + +pfor.inc1173: ; preds = %cleanup1057 + sync within %syncreg.i, label %sync.continue1178 + +sync.continue1178: ; preds = %pfor.inc1173 + detach within %syncreg.i, label %pfor.body1203, label %pfor.inc1297 + +pfor.body1203: ; preds = %sync.continue1178 + br label %if.then1224 + +if.then1224: ; preds = %pfor.body1203 + unreachable + +pfor.inc1297: ; preds = %sync.continue1178 + sync within %syncreg.i, label %sync.continue1302 + +sync.continue1302: ; preds = %pfor.inc1297 + unreachable + +cleanup1350: ; preds = %cleanup427.thread + ret void + +; CHECK: if.then893.epil.sd: +; CHECK-NEXT: switch i32 {{.+}}, label %if.end936.epil.sd [ +; CHECK-NEXT: i32 0, label %land.lhs.true.i784.epil.sd +; CHECK-NEXT: i32 1, label %land.lhs.true2.i795.epil.sd +; CHECK-NEXT: ] + +; CHECK: if.end936.epil.sd: +; CHECK-NEXT: br label %for.cond.i767.epil.sd + +; CHECK: for.cond.i767.epil.sd: +; CHECK-NEXT: br label %for.cond.i767.epil.sd + +; CHECK: if.then893.strpm: +; CHECK-NEXT: switch i32 {{.+}}, label %if.end936.strpm [ +; CHECK-NEXT: i32 0, label %land.lhs.true.i784.strpm +; CHECK-NEXT: i32 1, label %land.lhs.true2.i795.strpm +; CHECK-NEXT: ] + +; CHECK: if.end936.strpm: +; CHECK-NEXT: br label %for.cond.i767.strpm + +; CHECK: for.cond.i767.strpm: +; CHECK-NEXT: br label %for.cond.i767.strpm +} + +attributes #0 = { argmemonly nounwind willreturn } +attributes #1 = { "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 12.0.0 (git@github.com:OpenCilk/opencilk-project.git 31ad596bd7126d79fa36fd82538084e8a8f4d913)"}