Conversation
Member
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-powerpc Author: Florian Hahn (fhahn) ChangesReverts llvm/llvm-project#185969 Patch is 113.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187504.diff 34 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c8630d56ea06f..9dc7e79651272 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2423,6 +2423,20 @@ BasicBlock *InnerLoopVectorizer::createScalarPreheader(StringRef Prefix) {
Twine(Prefix) + "scalar.ph");
}
+/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
+/// expansion results.
+static Value *getExpandedStep(const InductionDescriptor &ID,
+ const SCEV2ValueTy &ExpandedSCEVs) {
+ const SCEV *Step = ID.getStep();
+ if (auto *C = dyn_cast<SCEVConstant>(Step))
+ return C->getValue();
+ if (auto *U = dyn_cast<SCEVUnknown>(Step))
+ return U->getValue();
+ Value *V = ExpandedSCEVs.lookup(Step);
+ assert(V && "SCEV must be expanded at this point");
+ return V;
+}
+
/// Knowing that loop \p L executes a single vector iteration, add instructions
/// that will get simplified and thus should not have any cost to \p
/// InstsToIgnore.
@@ -7329,6 +7343,73 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
return BestFactor;
}
+// If \p EpiResumePhiR is resume VPPhi for a reduction when vectorizing the
+// epilog loop, fix the reduction's scalar PHI node by adding the incoming value
+// from the main vector loop.
+static void fixReductionScalarResumeWhenVectorizingEpilog(
+ VPPhi *EpiResumePhiR, PHINode &EpiResumePhi, BasicBlock *BypassBlock) {
+ using namespace VPlanPatternMatch;
+ // Get the VPInstruction computing the reduction result in the middle block.
+ // The first operand may not be from the middle block if it is not connected
+ // to the scalar preheader. In that case, there's nothing to fix.
+ VPValue *Incoming = EpiResumePhiR->getOperand(0);
+ match(Incoming, VPlanPatternMatch::m_ZExtOrSExt(
+ VPlanPatternMatch::m_VPValue(Incoming)));
+ auto *EpiRedResult = dyn_cast<VPInstruction>(Incoming);
+ if (!EpiRedResult)
+ return;
+
+ VPValue *BackedgeVal;
+ bool IsFindIV = false;
+ if (EpiRedResult->getOpcode() == VPInstruction::ComputeAnyOfResult ||
+ EpiRedResult->getOpcode() == VPInstruction::ComputeReductionResult)
+ BackedgeVal = EpiRedResult->getOperand(EpiRedResult->getNumOperands() - 1);
+ else if (matchFindIVResult(EpiRedResult, m_VPValue(BackedgeVal), m_VPValue()))
+ IsFindIV = true;
+ else
+ return;
+
+ auto *EpiRedHeaderPhi = cast_if_present<VPReductionPHIRecipe>(
+ vputils::findRecipe(BackedgeVal, IsaPred<VPReductionPHIRecipe>));
+ if (!EpiRedHeaderPhi) {
+ match(BackedgeVal,
+ VPlanPatternMatch::m_Select(VPlanPatternMatch::m_VPValue(),
+ VPlanPatternMatch::m_VPValue(BackedgeVal),
+ VPlanPatternMatch::m_VPValue()));
+ EpiRedHeaderPhi = cast<VPReductionPHIRecipe>(
+ vputils::findRecipe(BackedgeVal, IsaPred<VPReductionPHIRecipe>));
+ }
+
+ Value *MainResumeValue;
+ if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
+ assert((VPI->getOpcode() == VPInstruction::Broadcast ||
+ VPI->getOpcode() == VPInstruction::ReductionStartVector) &&
+ "unexpected start recipe");
+ MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
+ } else
+ MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
+ if (EpiRedResult->getOpcode() == VPInstruction::ComputeAnyOfResult) {
+ [[maybe_unused]] Value *StartV =
+ EpiRedResult->getOperand(0)->getLiveInIRValue();
+ auto *Cmp = cast<ICmpInst>(MainResumeValue);
+ assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
+ "AnyOf expected to start with ICMP_NE");
+ assert(Cmp->getOperand(1) == StartV &&
+ "AnyOf expected to start by comparing main resume value to original "
+ "start value");
+ MainResumeValue = Cmp->getOperand(0);
+ } else if (IsFindIV) {
+ MainResumeValue = cast<SelectInst>(MainResumeValue)->getFalseValue();
+ }
+ PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
+
+ // When fixing reductions in the epilogue loop we should already have
+ // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
+ // over the incoming values correctly.
+ EpiResumePhi.setIncomingValueForBlock(
+ BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock));
+}
+
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
@@ -8913,9 +8994,35 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
!EnableLoopVectorization) {}
/// Prepare \p MainPlan for vectorizing the main vector loop during epilogue
-/// vectorization.
-static SmallVector<VPInstruction *>
-preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
+/// vectorization. Remove ResumePhis from \p MainPlan for inductions that
+/// don't have a corresponding wide induction in \p EpiPlan.
+static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
+ // Collect PHI nodes of widened phis in the VPlan for the epilogue. Those
+ // will need their resume-values computed in the main vector loop. Others
+ // can be removed from the main VPlan.
+ SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
+ for (VPRecipeBase &R :
+ EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ EpiWidenedPhis.insert(
+ cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
+ }
+ for (VPRecipeBase &R :
+ make_early_inc_range(MainPlan.getScalarHeader()->phis())) {
+ auto *VPIRInst = cast<VPIRPhi>(&R);
+ if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi()))
+ continue;
+ // There is no corresponding wide induction in the epilogue plan that would
+ // need a resume value. Remove the VPIRInst wrapping the scalar header phi
+ // together with the corresponding ResumePhi. The resume values for the
+ // scalar loop will be created during execution of EpiPlan.
+ VPRecipeBase *ResumePhi = VPIRInst->getOperand(0)->getDefiningRecipe();
+ VPIRInst->eraseFromParent();
+ ResumePhi->eraseFromParent();
+ }
+ RUN_VPLAN_PASS(VPlanTransforms::removeDeadRecipes, MainPlan);
+
using namespace VPlanPatternMatch;
// When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can
// introduce multiple uses of undef/poison. If the reduction start value may
@@ -8976,26 +9083,14 @@ preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
{VectorTC, MainPlan.getZero(Ty)}, {}, "vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
- ResumePhi->setName("vec.epilog.resume.val");
- if (&MainScalarPH->front() != ResumePhi)
+ if (MainScalarPH->begin() == MainScalarPH->end())
+ ResumePhi->moveBefore(*MainScalarPH, MainScalarPH->end());
+ else if (&*MainScalarPH->begin() != ResumePhi)
ResumePhi->moveBefore(*MainScalarPH, MainScalarPH->begin());
}
-
- // Create a ResumeForEpilogue for the canonical IV resume as the
- // first non-phi, to keep it alive for the epilogue.
- VPBuilder ResumeBuilder(MainScalarPH);
- ResumeBuilder.createNaryOp(VPInstruction::ResumeForEpilogue, ResumePhi);
-
- // Create ResumeForEpilogue instructions for the resume phis of the
- // VPIRPhis in the scalar header of the main plan and return them so they can
- // be used as resume values when vectorizing the epilogue.
- return to_vector(
- map_range(MainPlan.getScalarHeader()->phis(), [&](VPRecipeBase &R) {
- assert(isa<VPIRPhi>(R) &&
- "only VPIRPhis expected in the scalar header");
- return ResumeBuilder.createNaryOp(VPInstruction::ResumeForEpilogue,
- R.getOperand(0));
- }));
+ // Add a user to to make sure the resume phi won't get removed.
+ VPBuilder(MainScalarPH)
+ .createNaryOp(VPInstruction::ResumeForEpilogue, ResumePhi);
}
/// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
@@ -9203,11 +9298,39 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
return InstsToMove;
}
-static void
-fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L,
- VPlan &BestEpiPlan,
- ArrayRef<VPInstruction *> ResumeValues) {
- // Fix resume values from the additional bypass block.
+// Generate bypass values from the additional bypass block. Note that when the
+// vectorized epilogue is skipped due to iteration count check, then the
+// resume value for the induction variable comes from the trip count of the
+// main vector loop, passed as the second argument.
+static Value *createInductionAdditionalBypassValues(
+ PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder,
+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
+ Instruction *OldInduction) {
+ Value *Step = getExpandedStep(II, ExpandedSCEVs);
+ // For the primary induction the additional bypass end value is known.
+ // Otherwise it is computed.
+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
+ if (OrigPhi != OldInduction) {
+ auto *BinOp = II.getInductionBinOp();
+ // Fast-math-flags propagate from the original induction instruction.
+ if (isa_and_nonnull<FPMathOperator>(BinOp))
+ BypassBuilder.setFastMathFlags(BinOp->getFastMathFlags());
+
+ // Compute the end value for the additional bypass.
+ EndValueFromAdditionalBypass =
+ emitTransformedIndex(BypassBuilder, MainVectorTripCount,
+ II.getStartValue(), Step, II.getKind(), BinOp);
+ EndValueFromAdditionalBypass->setName("ind.end");
+ }
+ return EndValueFromAdditionalBypass;
+}
+
+static void fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L,
+ VPlan &BestEpiPlan,
+ LoopVectorizationLegality &LVL,
+ const SCEV2ValueTy &ExpandedSCEVs,
+ Value *MainVectorTripCount) {
+ // Fix reduction resume values from the additional bypass block.
BasicBlock *PH = L->getLoopPreheader();
for (auto *Pred : predecessors(PH)) {
for (PHINode &Phi : PH->phis()) {
@@ -9218,36 +9341,54 @@ fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L,
}
auto *ScalarPH = cast<VPIRBasicBlock>(BestEpiPlan.getScalarPreheader());
if (ScalarPH->hasPredecessors()) {
- // Fix resume values for inductions and reductions from the additional
- // bypass block using the incoming values from the main loop's resume phis.
- // ResumeValues correspond 1:1 with the scalar loop header phis.
- for (auto [ResumeV, HeaderPhi] :
- zip(ResumeValues, BestEpiPlan.getScalarHeader()->phis())) {
- auto *HeaderPhiR = cast<VPIRPhi>(&HeaderPhi);
- if (isa<VPIRValue>(HeaderPhiR->getIncomingValueForBlock(ScalarPH)))
- continue;
- auto *EpiResumePhi =
- cast<PHINode>(HeaderPhiR->getIRPhi().getIncomingValueForBlock(PH));
- if (EpiResumePhi->getBasicBlockIndex(BypassBlock) == -1)
- continue;
- auto *MainResumePhi = cast<PHINode>(ResumeV->getUnderlyingValue());
- EpiResumePhi->setIncomingValueForBlock(
- BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock));
+ // If ScalarPH has predecessors, we may need to update its reduction
+ // resume values.
+ for (const auto &[R, IRPhi] :
+ zip(ScalarPH->phis(), ScalarPH->getIRBasicBlock()->phis())) {
+ fixReductionScalarResumeWhenVectorizingEpilog(cast<VPPhi>(&R), IRPhi,
+ BypassBlock);
+ }
+ }
+
+ // Fix induction resume values from the additional bypass block.
+ IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt());
+ for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
+ Value *V = createInductionAdditionalBypassValues(
+ IVPhi, II, BypassBuilder, ExpandedSCEVs, MainVectorTripCount,
+ LVL.getPrimaryInduction());
+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
+ if (auto *Inc = dyn_cast<PHINode>(IVPhi->getIncomingValueForBlock(PH))) {
+ if (Inc->getBasicBlockIndex(BypassBlock) != -1)
+ Inc->setIncomingValueForBlock(BypassBlock, V);
+ } else {
+ // If the resume value in the scalar preheader was simplified (e.g., when
+ // narrowInterleaveGroups optimized away the resume PHIs), create a new
+ // PHI to merge the bypass value with the original value.
+ Value *OrigVal = IVPhi->getIncomingValueForBlock(PH);
+ PHINode *NewPhi =
+ PHINode::Create(IVPhi->getType(), pred_size(PH), "bc.resume.val",
+ PH->getFirstNonPHIIt());
+ for (auto *Pred : predecessors(PH)) {
+ if (Pred == BypassBlock)
+ NewPhi->addIncoming(V, Pred);
+ else
+ NewPhi->addIncoming(OrigVal, Pred);
+ }
+ IVPhi->setIncomingValueForBlock(PH, NewPhi);
}
}
}
/// Connect the epilogue vector loop generated for \p EpiPlan to the main vector
-/// loop, after both plans have executed, updating branches from the iteration
-/// and runtime checks of the main loop, as well as updating various phis. \p
-/// InstsToMove contains instructions that need to be moved to the preheader of
-/// the epilogue vector loop.
-static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L,
- EpilogueLoopVectorizationInfo &EPI,
- DominatorTree *DT,
- GeneratedRTChecks &Checks,
- ArrayRef<Instruction *> InstsToMove,
- ArrayRef<VPInstruction *> ResumeValues) {
+// loop, after both plans have executed, updating branches from the iteration
+// and runtime checks of the main loop, as well as updating various phis. \p
+// InstsToMove contains instructions that need to be moved to the preheader of
+// the epilogue vector loop.
+static void connectEpilogueVectorLoop(
+ VPlan &EpiPlan, Loop *L, EpilogueLoopVectorizationInfo &EPI,
+ DominatorTree *DT, LoopVectorizationLegality &LVL,
+ DenseMap<const SCEV *, Value *> &ExpandedSCEVs, GeneratedRTChecks &Checks,
+ ArrayRef<Instruction *> InstsToMove) {
BasicBlock *VecEpilogueIterationCountCheck =
cast<VPIRBasicBlock>(EpiPlan.getEntry())->getIRBasicBlock();
@@ -9330,13 +9471,7 @@ static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L,
// after executing the main loop. We need to update the resume values of
// inductions and reductions during epilogue vectorization.
fixScalarResumeValuesFromBypass(VecEpilogueIterationCountCheck, L, EpiPlan,
- ResumeValues);
-
- // Remove dead phis that were moved to the epilogue preheader but are unused
- // (e.g., resume phis for inductions not widened in the epilogue vector loop).
- for (PHINode &Phi : make_early_inc_range(VecEpiloguePreHeader->phis()))
- if (Phi.use_empty())
- Phi.eraseFromParent();
+ LVL, ExpandedSCEVs, EPI.VectorTripCount);
}
bool LoopVectorizePass::processLoop(Loop *L) {
@@ -9723,8 +9858,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width);
BestEpiPlan.getMiddleBlock()->setName("vec.epilog.middle.block");
BestEpiPlan.getVectorPreheader()->setName("vec.epilog.ph");
- SmallVector<VPInstruction *> ResumeValues =
- preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
+ preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
BestEpiPlan);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
@@ -9741,8 +9875,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE());
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
true);
- connectEpilogueVectorLoop(BestEpiPlan, L, EPI, DT, Checks, InstsToMove,
- ResumeValues);
+ connectEpilogueVectorLoop(BestEpiPlan, L, EPI, DT, LVL, ExpandedSCEVs,
+ Checks, InstsToMove);
++LoopsEpilogueVectorized;
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4748b880721fa..00c9cbad9dfd6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1322,12 +1322,6 @@ void VPInstruction::execute(VPTransformState &State) {
"scalar value but not only first lane defined");
State.set(this, GeneratedValue,
/*IsScalar*/ GeneratesPerFirstLaneOnly);
- if (getOpcode() == VPInstruction::ResumeForEpilogue) {
- // FIXME: This is a workaround to enable reliable updates of the scalar loop
- // resume phis, when vectorizing the epilogue. Must be removed once epilogue
- // vectorization explicitly connects VPlans.
- setUnderlyingValue(GeneratedValue);
- }
}
bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index ae1f7829449da..4eced1640bd14 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -46,6 +46,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 8
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
@@ -83,7 +84,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index efa64c661ebc0..44636222a8648 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -446,8 +446,6 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4
-; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -467,6 ...
[truncated]
|
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 23, 2026
…logue vec." (llvm#187504)" This reverts commit cdaf29f.
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 23, 2026
…logue vec." (llvm#187504)" This reverts commit cdaf29f.
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 23, 2026
…ec." (llvm#187504) This reverts commit cdaf29f. This version skips removeBranchOnConst when vectorizing the epilogue, as it may trigger folds that remove the resume phi used as resume value from the epilogue. This fixes llvm#187323. Original message: This patch tries to drastically simplify resume value handling for the scalar loop when vectorizing the epilogue. It uses a simpler, uniform approach for updating all resume values in the scalar loop: 1. Create ResumeForEpilogue recipes for all scalar resume phis in the main loop (the epilogue plan will have exactly the same scalar resume phis, in exactly the same order) 2. Update ::execute for ResumeForEpilogue to set the underlying value when executing. This is not super clean, but allows easy lookup of the generated IR value when we update the resume phis in the epilogue. Once we connect the 2 plans together explicitly, this can be removed. 3. Use the list of ResumeForEpilogue VPInstructions from the main loop to update the resume/bypass values from the epilogue. This simplifies the code quite a bit, makes it more robust (should fix llvm#179407) and also fixes a mis-compile in the existing tests (see change in llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll, where previously we would incorrectly resume using the start value when the epilogue iteration check failed) In some cases, we get simpler code, due to additional CSE, in some cases the induction end value computations get moved from the epilogue iteration check to the vector preheader. We could try to sink the instructions as cleanup, but it is probably not worth the trouble. Fixes llvm#179407.
fhahn
added a commit
that referenced
this pull request
Mar 23, 2026
…ec." (#187504) This reverts commit cdaf29f. This version skips removeBranchOnConst when vectorizing the epilogue, as it may trigger folds that remove the resume phi used as resume value from the epilogue. This fixes #187323. Original message: This patch tries to drastically simplify resume value handling for the scalar loop when vectorizing the epilogue. It uses a simpler, uniform approach for updating all resume values in the scalar loop: 1. Create ResumeForEpilogue recipes for all scalar resume phis in the main loop (the epilogue plan will have exactly the same scalar resume phis, in exactly the same order) 2. Update ::execute for ResumeForEpilogue to set the underlying value when executing. This is not super clean, but allows easy lookup of the generated IR value when we update the resume phis in the epilogue. Once we connect the 2 plans together explicitly, this can be removed. 3. Use the list of ResumeForEpilogue VPInstructions from the main loop to update the resume/bypass values from the epilogue. This simplifies the code quite a bit, makes it more robust (should fix #179407) and also fixes a mis-compile in the existing tests (see change in llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll, where previously we would incorrectly resume using the start value when the epilogue iteration check failed) In some cases, we get simpler code, due to additional CSE, in some cases the induction end value computations get moved from the epilogue iteration check to the vector preheader. We could try to sink the instructions as cleanup, but it is probably not worth the trouble. Fixes #179407. PR for recommit #188134
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 25, 2026
… and UF. (llvm#181252)" This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 25, 2026
… and UF. (llvm#181252)" This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
fhahn
added a commit
to fhahn/llvm-project
that referenced
this pull request
Mar 26, 2026
… and UF. (llvm#181252)" This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
fhahn
added a commit
that referenced
this pull request
Mar 26, 2026
… and UF. (#181252)" (#188589) This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: #181252
ambergorzynski
pushed a commit
to ambergorzynski/llvm-project
that referenced
this pull request
Mar 27, 2026
…ec." (llvm#187504) This reverts commit cdaf29f. This version skips removeBranchOnConst when vectorizing the epilogue, as it may trigger folds that remove the resume phi used as resume value from the epilogue. This fixes llvm#187323. Original message: This patch tries to drastically simplify resume value handling for the scalar loop when vectorizing the epilogue. It uses a simpler, uniform approach for updating all resume values in the scalar loop: 1. Create ResumeForEpilogue recipes for all scalar resume phis in the main loop (the epilogue plan will have exactly the same scalar resume phis, in exactly the same order) 2. Update ::execute for ResumeForEpilogue to set the underlying value when executing. This is not super clean, but allows easy lookup of the generated IR value when we update the resume phis in the epilogue. Once we connect the 2 plans together explicitly, this can be removed. 3. Use the list of ResumeForEpilogue VPInstructions from the main loop to update the resume/bypass values from the epilogue. This simplifies the code quite a bit, makes it more robust (should fix llvm#179407) and also fixes a mis-compile in the existing tests (see change in llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll, where previously we would incorrectly resume using the start value when the epilogue iteration check failed) In some cases, we get simpler code, due to additional CSE, in some cases the induction end value computations get moved from the epilogue iteration check to the vector preheader. We could try to sink the instructions as cleanup, but it is probably not worth the trouble. Fixes llvm#179407. PR for recommit llvm#188134
ambergorzynski
pushed a commit
to ambergorzynski/llvm-project
that referenced
this pull request
Mar 27, 2026
… and UF. (llvm#181252)" (llvm#188589) This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
albertbolt1
pushed a commit
to albertbolt1/llvm-project
that referenced
this pull request
Mar 28, 2026
…c." (llvm#187504) Reverts llvm#185969 This is suspected to cause a miscompile in 549.fotonik3d_r from SPEC 2017 FP
Aadarsh-Keshri
pushed a commit
to Aadarsh-Keshri/llvm-project
that referenced
this pull request
Mar 28, 2026
…ec." (llvm#187504) This reverts commit cdaf29f. This version skips removeBranchOnConst when vectorizing the epilogue, as it may trigger folds that remove the resume phi used as resume value from the epilogue. This fixes llvm#187323. Original message: This patch tries to drastically simplify resume value handling for the scalar loop when vectorizing the epilogue. It uses a simpler, uniform approach for updating all resume values in the scalar loop: 1. Create ResumeForEpilogue recipes for all scalar resume phis in the main loop (the epilogue plan will have exactly the same scalar resume phis, in exactly the same order) 2. Update ::execute for ResumeForEpilogue to set the underlying value when executing. This is not super clean, but allows easy lookup of the generated IR value when we update the resume phis in the epilogue. Once we connect the 2 plans together explicitly, this can be removed. 3. Use the list of ResumeForEpilogue VPInstructions from the main loop to update the resume/bypass values from the epilogue. This simplifies the code quite a bit, makes it more robust (should fix llvm#179407) and also fixes a mis-compile in the existing tests (see change in llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll, where previously we would incorrectly resume using the start value when the epilogue iteration check failed) In some cases, we get simpler code, due to additional CSE, in some cases the induction end value computations get moved from the epilogue iteration check to the vector preheader. We could try to sink the instructions as cleanup, but it is probably not worth the trouble. Fixes llvm#179407. PR for recommit llvm#188134
Aadarsh-Keshri
pushed a commit
to Aadarsh-Keshri/llvm-project
that referenced
this pull request
Mar 28, 2026
… and UF. (llvm#181252)" (llvm#188589) This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
fzou1
pushed a commit
to fzou1/llvm-project
that referenced
this pull request
Mar 30, 2026
… and UF. (llvm#181252)" (llvm#188589) This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
Himadhith
pushed a commit
to Himadhith/llvm-project
that referenced
this pull request
Apr 2, 2026
… and UF. (llvm#181252)" (llvm#188589) This reverts commit e30f9c1. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb848 (llvm#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: llvm#181252
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Reverts #185969