diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 11c73303348f3..4a8e0dbff7f84 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -248,10 +248,6 @@ enum class TailFoldingStyle { /// active.lane.mask to calculate the mask for the next iteration. If the /// increment overflows, the mask is no longer correct. DataAndControlFlow, - /// Use predicate to control both data and control flow, but modify - /// the trip count so that a runtime overflow check can be avoided - /// and such that the scalar epilogue loop can always be removed. - DataAndControlFlowWithoutRuntimeCheck, /// Use predicated EVL instructions for tail-folding. /// Indicates that VP intrinsics should be used. DataWithEVL, @@ -754,13 +750,7 @@ class TargetTransformInfo { LLVM_ABI bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const; /// Query the target what the preferred style of tail folding is. - /// \param IVUpdateMayOverflow Tells whether it is known if the IV update - /// may (or will never) overflow for the suggested VF/UF in the given loop. - /// Targets can use this information to select a more optimal tail folding - /// style. The value conservatively defaults to true, such that no assumptions - /// are made on overflow. - LLVM_ABI TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const; + LLVM_ABI TailFoldingStyle getPreferredTailFoldingStyle() const; // Parameters that control the loop peeling transformation struct PeelingPreferences { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index e062b70be6b59..6e5d7d308be21 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -278,8 +278,7 @@ class TargetTransformInfoImplBase { return false; } - virtual TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { + virtual TailFoldingStyle getPreferredTailFoldingStyle() const { return TailFoldingStyle::DataWithoutLaneMask; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 43eb9b5b8aeb8..b0a7d11f08be7 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -801,9 +801,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::preferPredicateOverEpilogue(TFI); } - TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override { - return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow); + TailFoldingStyle getPreferredTailFoldingStyle() const override { + return BaseT::getPreferredTailFoldingStyle(); } std::optional diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index cbfe217e69d09..a196492641d53 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -387,9 +387,8 @@ bool TargetTransformInfo::preferPredicateOverEpilogue( return TTIImpl->preferPredicateOverEpilogue(TFI); } -TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle( - bool IVUpdateMayOverflow) const { - return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow); +TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle() const { + return TTIImpl->getPreferredTailFoldingStyle(); } std::optional diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index f247e9e49e23f..c7716020da13c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -464,14 +464,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase { return ST->hasSVE() ? 5 : 0; } - TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { - if (ST->hasSVE()) - return IVUpdateMayOverflow - ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck - : TailFoldingStyle::DataAndControlFlow; - - return TailFoldingStyle::DataWithoutLaneMask; + TailFoldingStyle getPreferredTailFoldingStyle() const override { + return ST->hasSVE() ? TailFoldingStyle::DataAndControlFlow + : TailFoldingStyle::DataWithoutLaneMask; } bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index a7766d91af3f2..2a8ca56b17a92 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2670,8 +2670,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { *LVL->getDominatorTree()); } -TailFoldingStyle -ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { +TailFoldingStyle ARMTTIImpl::getPreferredTailFoldingStyle() const { if (!ST->hasMVEIntegerOps() || !EnableTailPredication) return TailFoldingStyle::DataWithoutLaneMask; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 94804152d96ec..75a32d81a44aa 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -432,8 +432,7 @@ class ARMTTIImpl final : public BasicTTIImplBase { TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override; - TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override; + TailFoldingStyle getPreferredTailFoldingStyle() const override; void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 424f9fe52c59e..97a364d741314 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -122,8 +122,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override { return ST->hasVInstructions(); } - TailFoldingStyle - getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { + TailFoldingStyle getPreferredTailFoldingStyle() const override { return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL : TailFoldingStyle::None; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bdbcada822e4a..36c8c0560c9eb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -243,9 +243,6 @@ static cl::opt ForceTailFoldingStyle( clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), - clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, - "data-and-control-without-rt-check", - "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))); @@ -1327,34 +1324,27 @@ class LoopVectorizationCostModel { } /// Returns the TailFoldingStyle that is best for the current loop. - TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { - if (!ChosenTailFoldingStyle) - return TailFoldingStyle::None; - return IVUpdateMayOverflow ? ChosenTailFoldingStyle->first - : ChosenTailFoldingStyle->second; + TailFoldingStyle getTailFoldingStyle() const { + return ChosenTailFoldingStyle; } - /// Selects and saves TailFoldingStyle for 2 options - if IV update may - /// overflow or not. + /// Selects and saves TailFoldingStyle. /// \param IsScalableVF true if scalable vector factors enabled. /// \param UserIC User specific interleave count. - void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { - assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); + void setTailFoldingStyle(bool IsScalableVF, unsigned UserIC) { + assert(ChosenTailFoldingStyle == TailFoldingStyle::None && + "Tail folding must not be selected yet."); if (!Legal->canFoldTailByMasking()) { - ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None}; + ChosenTailFoldingStyle = TailFoldingStyle::None; return; } // Default to TTI preference, but allow command line override. - ChosenTailFoldingStyle = { - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)}; + ChosenTailFoldingStyle = TTI.getPreferredTailFoldingStyle(); if (ForceTailFoldingStyle.getNumOccurrences()) - ChosenTailFoldingStyle = {ForceTailFoldingStyle.getValue(), - ForceTailFoldingStyle.getValue()}; + ChosenTailFoldingStyle = ForceTailFoldingStyle.getValue(); - if (ChosenTailFoldingStyle->first != TailFoldingStyle::DataWithEVL && - ChosenTailFoldingStyle->second != TailFoldingStyle::DataWithEVL) + if (ChosenTailFoldingStyle != TailFoldingStyle::DataWithEVL) return; // Override EVL styles if needed. // FIXME: Investigate opportunity for fixed vector factor. @@ -1366,10 +1356,9 @@ class LoopVectorizationCostModel { // if it's allowed, or DataWithoutLaneMask otherwise. if (ScalarEpilogueStatus == CM_ScalarEpilogueAllowed || ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) - ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None}; + ChosenTailFoldingStyle = TailFoldingStyle::None; else - ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask, - TailFoldingStyle::DataWithoutLaneMask}; + ChosenTailFoldingStyle = TailFoldingStyle::DataWithoutLaneMask; LLVM_DEBUG( dbgs() << "LV: Preference for VP intrinsics indicated. Will " @@ -1381,8 +1370,6 @@ class LoopVectorizationCostModel { /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { - // TODO: check if it is possible to check for None style independent of - // IVUpdateMayOverflow flag in getTailFoldingStyle. return getTailFoldingStyle() != TailFoldingStyle::None; } @@ -1392,9 +1379,7 @@ class LoopVectorizationCostModel { if (!EnableWideActiveLaneMask) return false; - TailFoldingStyle TF = getTailFoldingStyle(); - return TF == TailFoldingStyle::DataAndControlFlow || - TF == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; + return getTailFoldingStyle() == TailFoldingStyle::DataAndControlFlow; } /// Return maximum safe number of elements to be processed per vector @@ -1606,10 +1591,8 @@ class LoopVectorizationCostModel { /// iterations to execute in the scalar loop. ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - /// Control finally chosen tail folding style. The first element is used if - /// the IV update may overflow, the second element - if it does not. - std::optional> - ChosenTailFoldingStyle; + /// Control finally chosen tail folding style. + TailFoldingStyle ChosenTailFoldingStyle = TailFoldingStyle::None; /// true if scalable vectorization is supported and enabled. std::optional IsScalableVectorizationAllowed; @@ -2098,13 +2081,11 @@ class GeneratedRTChecks { static bool useActiveLaneMask(TailFoldingStyle Style) { return Style == TailFoldingStyle::Data || - Style == TailFoldingStyle::DataAndControlFlow || - Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; + Style == TailFoldingStyle::DataAndControlFlow; } static bool useActiveLaneMaskForControlFlow(TailFoldingStyle Style) { - return Style == TailFoldingStyle::DataAndControlFlow || - Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; + return Style == TailFoldingStyle::DataAndControlFlow; } // Return true if \p OuterLp is an outer loop annotated with hints for explicit @@ -3727,7 +3708,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. bool ContainsScalableVF = MaxFactors.ScalableVF.isNonZero(); - setTailFoldingStyles(ContainsScalableVF, UserIC); + setTailFoldingStyle(ContainsScalableVF, UserIC); if (foldTailByMasking()) { if (foldTailWithEVL()) { LLVM_DEBUG( @@ -8203,7 +8184,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( for (ElementCount VF : Range) IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF); - TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); + TailFoldingStyle Style = CM.getTailFoldingStyle(); // Use NUW for the induction increment if we proved that it won't overflow in // the vector loop or when not folding the tail. In the later case, we know // that the canonical induction increment will not overflow as the vector trip @@ -8415,10 +8396,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // TODO: Move checks to VPlanTransforms::addActiveLaneMask once // TailFoldingStyle is visible there. bool ForControlFlow = useActiveLaneMaskForControlFlow(Style); - bool WithoutRuntimeCheck = - Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; - VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow, - WithoutRuntimeCheck); + VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow); } VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c6aabe7f1ec0f..54463809947e9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2825,33 +2825,23 @@ void VPlanTransforms::optimize(VPlan &Plan) { // dropped from the canonical IV increment. Return the created // VPActiveLaneMaskPHIRecipe. // -// The function uses the following definitions: -// -// %TripCount = DataWithControlFlowWithoutRuntimeCheck ? -// calculate-trip-count-minus-VF (original TC) : original TC -// %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ? -// CanonicalIVPhi : CanonicalIVIncrement -// %StartV is the canonical induction start value. -// // The function adds the following recipes: // // vector.ph: -// %TripCount = calculate-trip-count-minus-VF (original TC) -// [if DataWithControlFlowWithoutRuntimeCheck] -// %EntryInc = canonical-iv-increment-for-part %StartV -// %EntryALM = active-lane-mask %EntryInc, %TripCount +// %EntryInc = canonical-iv-increment-for-part CanonicalIVStart +// %EntryALM = active-lane-mask %EntryInc, TC // // vector.body: // ... // %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ] // ... -// %InLoopInc = canonical-iv-increment-for-part %IncrementValue -// %ALM = active-lane-mask %InLoopInc, TripCount +// %InLoopInc = canonical-iv-increment-for-part CanonicalIVIncrement +// %ALM = active-lane-mask %InLoopInc, TC // %Negated = Not %ALM // branch-on-cond %Negated // -static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( - VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { +static VPActiveLaneMaskPHIRecipe * +addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); auto *CanonicalIVPHI = TopRegion->getCanonicalIV(); @@ -2859,8 +2849,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( auto *CanonicalIVIncrement = cast(CanonicalIVPHI->getBackedgeValue()); - // TODO: Check if dropping the flags is needed if - // !DataAndControlFlowWithoutRuntimeCheck. + // TODO: Check if dropping the flags is needed. CanonicalIVIncrement->dropPoisonGeneratingFlags(); DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); // We can't use StartV directly in the ActiveLaneMask VPInstruction, since @@ -2871,24 +2860,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the ActiveLaneMask instruction using the correct start values. VPValue *TC = Plan.getTripCount(); - VPValue *VFxUF = &Plan.getVFxUF(); VPValue *VF = &Plan.getVF(); - VPValue *TripCount, *IncrementValue; - if (!DataAndControlFlowWithoutRuntimeCheck) { - // When the loop is guarded by a runtime overflow check for the loop - // induction variable increment by VF, we can increment the value before - // the get.active.lane mask and use the unmodified tripcount. - IncrementValue = CanonicalIVIncrement; - TripCount = TC; - } else { - // When avoiding a runtime check, the active.lane.mask inside the loop - // uses a modified trip count and the induction variable increment is - // done after the active.lane.mask intrinsic is called. - IncrementValue = CanonicalIVPHI; - TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, - {TC, VFxUF}, DL); - } auto *EntryIncrement = Builder.createOverflowingOp( VPInstruction::CanonicalIVIncrementForPart, {StartV, VF}, {false, false}, DL, "index.part.next"); @@ -2912,10 +2885,10 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( Builder.setInsertPoint(OriginalTerminator); auto *InLoopIncrement = Builder.createOverflowingOp( VPInstruction::CanonicalIVIncrementForPart, - {IncrementValue, &Plan.getVF()}, {false, false}, DL); + {CanonicalIVIncrement, &Plan.getVF()}, {false, false}, DL); auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, - {InLoopIncrement, TripCount, ALMMultiplier}, - DL, "active.lane.mask.next"); + {InLoopIncrement, TC, ALMMultiplier}, DL, + "active.lane.mask.next"); LaneMaskPhi->addOperand(ALM); // Replace the original terminator with BranchOnCond. We have to invert the @@ -2926,14 +2899,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } -void VPlanTransforms::addActiveLaneMask( - VPlan &Plan, bool UseActiveLaneMaskForControlFlow, - bool DataAndControlFlowWithoutRuntimeCheck) { - assert((!DataAndControlFlowWithoutRuntimeCheck || - UseActiveLaneMaskForControlFlow) && - "DataAndControlFlowWithoutRuntimeCheck implies " - "UseActiveLaneMaskForControlFlow"); - +void VPlanTransforms::addActiveLaneMask(VPlan &Plan, + bool UseActiveLaneMaskForControlFlow) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); auto *FoundWidenCanonicalIVUser = find_if( LoopRegion->getCanonicalIV()->users(), IsaPred); @@ -2944,7 +2911,7 @@ void VPlanTransforms::addActiveLaneMask( cast(*FoundWidenCanonicalIVUser); VPSingleDefRecipe *LaneMask; if (UseActiveLaneMaskForControlFlow) { - LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(Plan, false); + LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(Plan); } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); VPValue *ALMMultiplier = diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 0dce486cb1c2c..16f7ae2daeb5e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -253,14 +253,9 @@ struct VPlanTransforms { /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p /// UseActiveLaneMaskForControlFlow is true, introduce an - /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is - /// true, no minimum-iteration runtime check will be created (during skeleton - /// creation) and instead it is handled using active-lane-mask. \p - /// DataAndControlFlowWithoutRuntimeCheck implies \p - /// UseActiveLaneMaskForControlFlow. + /// VPActiveLaneMaskPHIRecipe. static void addActiveLaneMask(VPlan &Plan, - bool UseActiveLaneMaskForControlFlow, - bool DataAndControlFlowWithoutRuntimeCheck); + bool UseActiveLaneMaskForControlFlow); /// Insert truncates and extends for any truncated recipe. Redundant casts /// will be folded later.